23 changes: 23 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -764,4 +764,27 @@ let SMETargetGuard = "sme-f16f16" in {
[ImmCheck<0, ImmCheck0_1>]>;
}


multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, list<ImmCheck> ch> {
let SMETargetGuard = "sme2p1" in {
def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
MergeNone, i_prefix # "_horiz_x" # vg_num,
[IsStreaming, IsInOutZA], ch>;

def NAME # _V : SInst<"svreadz_ver_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t,
MergeNone, i_prefix # "_vert_x" #vg_num,
[IsStreaming, IsInOutZA], ch>;
}
}

defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16_X2 : ZAReadz<"za16", "2", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32_X2 : ZAReadz<"za32", "2", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64_X2 : ZAReadz<"za64", "2", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;

defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16_X4 : ZAReadz<"za16", "4", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32_X4 : ZAReadz<"za32", "4", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64_X4 : ZAReadz<"za64", "4", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;

} // let SVETargetGuard = InvalidMode
13 changes: 13 additions & 0 deletions clang/include/clang/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -3511,6 +3511,19 @@ class Parser : public CodeCompletionHandler {
/// metadirective and therefore ends on the closing paren.
StmtResult ParseOpenMPDeclarativeOrExecutableDirective(
ParsedStmtContext StmtCtx, bool ReadDirectiveWithinMetadirective = false);

/// Parses executable directive.
///
/// \param StmtCtx The context in which we're parsing the directive.
/// \param DKind The kind of the executable directive.
/// \param Loc Source location of the beginning of the directive.
/// \param ReadDirectiveWithinMetadirective true if directive is within a
/// metadirective and therefore ends on the closing paren.
StmtResult
ParseOpenMPExecutableDirective(ParsedStmtContext StmtCtx,
OpenMPDirectiveKind DKind, SourceLocation Loc,
bool ReadDirectiveWithinMetadirective);

/// Parses clause of kind \a CKind for directive of a kind \a Kind.
///
/// \param DKind Kind of current directive.
Expand Down
33 changes: 33 additions & 0 deletions clang/lib/AST/Interp/ByteCodeExprGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2930,6 +2930,39 @@ bool ByteCodeExprGen<Emitter>::VisitObjCBoxedExpr(const ObjCBoxedExpr *E) {
return this->delegate(E->getSubExpr());
}

template <class Emitter>
bool ByteCodeExprGen<Emitter>::VisitCXXStdInitializerListExpr(
const CXXStdInitializerListExpr *E) {
const Expr *SubExpr = E->getSubExpr();
const ConstantArrayType *ArrayType =
Ctx.getASTContext().getAsConstantArrayType(SubExpr->getType());
const Record *R = getRecord(E->getType());
assert(Initializing);
assert(SubExpr->isGLValue());

if (!this->visit(SubExpr))
return false;
if (!this->emitInitFieldPtr(R->getField(0u)->Offset, E))
return false;

PrimType SecondFieldT = classifyPrim(R->getField(1u)->Decl->getType());
if (isIntegralType(SecondFieldT)) {
if (!this->emitConst(static_cast<APSInt>(ArrayType->getSize()),
SecondFieldT, E))
return false;
return this->emitInitField(SecondFieldT, R->getField(1u)->Offset, E);
}
assert(SecondFieldT == PT_Ptr);

if (!this->emitGetFieldPtr(R->getField(0u)->Offset, E))
return false;
if (!this->emitConst(static_cast<APSInt>(ArrayType->getSize()), PT_Uint64, E))
return false;
if (!this->emitArrayElemPtrPop(PT_Uint64, E))
return false;
return this->emitInitFieldPtr(R->getField(1u)->Offset, E);
}

template <class Emitter> bool ByteCodeExprGen<Emitter>::discard(const Expr *E) {
OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/true,
/*NewInitializing=*/false);
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/Interp/ByteCodeExprGen.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ class ByteCodeExprGen : public ConstStmtVisitor<ByteCodeExprGen<Emitter>, bool>,
bool VisitShuffleVectorExpr(const ShuffleVectorExpr *E);
bool VisitExtVectorElementExpr(const ExtVectorElementExpr *E);
bool VisitObjCBoxedExpr(const ObjCBoxedExpr *E);
bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E);

protected:
bool visitExpr(const Expr *E) override;
Expand Down
13 changes: 9 additions & 4 deletions clang/lib/AST/Interp/Interp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,9 @@ bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
if (!Ptr.isZero())
return true;
const SourceInfo &Loc = S.Current->getSource(OpPC);
S.FFDiag(Loc, diag::note_constexpr_null_subobject) << CSK;
S.FFDiag(Loc, diag::note_constexpr_null_subobject)
<< CSK << S.Current->getRange(OpPC);

return false;
}

Expand All @@ -350,7 +352,8 @@ bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
if (!Ptr.isOnePastEnd())
return true;
const SourceInfo &Loc = S.Current->getSource(OpPC);
S.FFDiag(Loc, diag::note_constexpr_access_past_end) << AK;
S.FFDiag(Loc, diag::note_constexpr_access_past_end)
<< AK << S.Current->getRange(OpPC);
return false;
}

Expand All @@ -359,7 +362,8 @@ bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
if (!Ptr.isElementPastEnd())
return true;
const SourceInfo &Loc = S.Current->getSource(OpPC);
S.FFDiag(Loc, diag::note_constexpr_past_end_subobject) << CSK;
S.FFDiag(Loc, diag::note_constexpr_past_end_subobject)
<< CSK << S.Current->getRange(OpPC);
return false;
}

Expand All @@ -369,7 +373,8 @@ bool CheckSubobject(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
return true;

const SourceInfo &Loc = S.Current->getSource(OpPC);
S.FFDiag(Loc, diag::note_constexpr_past_end_subobject) << CSK;
S.FFDiag(Loc, diag::note_constexpr_past_end_subobject)
<< CSK << S.Current->getRange(OpPC);
return false;
}

Expand Down
5 changes: 5 additions & 0 deletions clang/lib/AST/Interp/Pointer.h
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,8 @@ class Pointer {
unsigned getByteOffset() const {
if (isIntegralPointer())
return asIntPointer().Value + Offset;
if (isOnePastEnd())
return PastEndMark;
return Offset;
}

Expand Down Expand Up @@ -551,6 +553,9 @@ class Pointer {
if (!asBlockPointer().Pointee)
return false;

if (isUnknownSizeArray())
return false;

return isElementPastEnd() ||
(getSize() == getOffset() && !isZeroSizeArray());
}
Expand Down
17 changes: 11 additions & 6 deletions clang/lib/Analysis/FlowSensitive/Transfer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,17 +391,22 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
}
case UO_PreInc:
case UO_PreDec:
// Propagate the storage location, but don't create a new value; to
// avoid generating unnecessary values, we leave it to the specific
// analysis to do this if desired.
// Propagate the storage location and clear out any value associated with
// it (to represent the fact that the value has definitely changed).
// To avoid generating unnecessary values, we leave it to the specific
// analysis to create a new value if desired.
propagateStorageLocation(*S->getSubExpr(), *S, Env);
if (StorageLocation *Loc = Env.getStorageLocation(*S->getSubExpr()))
Env.clearValue(*Loc);
break;
case UO_PostInc:
case UO_PostDec:
// Propagate the old value, but don't create a new value; to avoid
// generating unnecessary values, we leave it to the specific analysis
// to do this if desired.
// Propagate the old value, then clear out any value associated with the
// storage location (to represent the fact that the value has definitely
// changed). See above for rationale.
propagateValue(*S->getSubExpr(), *S, Env);
if (StorageLocation *Loc = Env.getStorageLocation(*S->getSubExpr()))
Env.clearValue(*Loc);
break;
default:
break;
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18479,6 +18479,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
case AMDGPU::BI__builtin_amdgcn_permlane16:
case AMDGPU::BI__builtin_amdgcn_permlanex16:
return emitBuiltinWithOneOverloadedType<6>(
*this, E,
BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
? Intrinsic::amdgcn_permlane16
: Intrinsic::amdgcn_permlanex16);
case AMDGPU::BI__builtin_amdgcn_permlane64:
return emitBuiltinWithOneOverloadedType<1>(*this, E,
Intrinsic::amdgcn_permlane64);
case AMDGPU::BI__builtin_amdgcn_readlane:
return emitBuiltinWithOneOverloadedType<2>(*this, E,
Intrinsic::amdgcn_readlane);
Expand Down
1,370 changes: 74 additions & 1,296 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
CodeGenFunction &CGF,
const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) override;

/// Get the GPU warp size.
llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);

/// Get the id of the current thread on the GPU.
llvm::Value *getGPUThreadID(CodeGenFunction &CGF);

Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CodeGenAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,9 @@ void BackendConsumer::HandleTranslationUnit(ASTContext &C) {
Ctx.setDiagnosticHandler(std::make_unique<ClangDiagnosticHandler>(
CodeGenOpts, this));

Ctx.setDefaultTargetCPU(TargetOpts.CPU);
Ctx.setDefaultTargetFeatures(llvm::join(TargetOpts.Features, ","));

Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr =
setupLLVMOptimizationRemarks(
Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses,
Expand Down Expand Up @@ -1205,6 +1208,9 @@ void CodeGenAction::ExecuteAction() {
Ctx.setDiagnosticHandler(
std::make_unique<ClangDiagnosticHandler>(CodeGenOpts, &Result));

Ctx.setDefaultTargetCPU(TargetOpts.CPU);
Ctx.setDefaultTargetFeatures(llvm::join(TargetOpts.Features, ","));

Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr =
setupLLVMOptimizationRemarks(
Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses,
Expand Down
525 changes: 211 additions & 314 deletions clang/lib/Parse/ParseOpenMP.cpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions clang/lib/Sema/ScopeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ void FunctionScopeInfo::Clear() {
FirstReturnLoc = SourceLocation();
FirstCXXOrObjCTryLoc = SourceLocation();
FirstSEHTryLoc = SourceLocation();
FirstVLALoc = SourceLocation();
FoundImmediateEscalatingExpression = false;

// Coroutine state
Expand Down
4 changes: 4 additions & 0 deletions clang/test/AST/Interp/arrays.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ static_assert(foo[2][2] == nullptr, "");
static_assert(foo[2][3] == &m, "");
static_assert(foo[2][4] == nullptr, "");

constexpr int afterEnd[] = {1,2,3};
static_assert(&afterEnd[3] == afterEnd + 3, "");

constexpr int ZeroSizeArray[] = {};

constexpr int SomeInt[] = {1};
Expand Down Expand Up @@ -623,3 +626,4 @@ constexpr int *get2() {
extern int same_entity_2[];
return same_entity_2;
}
static_assert(get2() == same_entity_2, "failed to find previous decl");
1,415 changes: 1,415 additions & 0 deletions clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions clang/test/CodeGen/asan-frame-pointer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ int global;

// NONE: define internal void @asan.module_ctor() #[[#ATTR:]] {
// NONE: define internal void @asan.module_dtor() #[[#ATTR]] {
// NONE: attributes #[[#ATTR]] = { nounwind }
// NONE: attributes #[[#ATTR]] = { nounwind

// NONLEAF: define internal void @asan.module_ctor() #[[#ATTR:]] {
// NONLEAF: define internal void @asan.module_dtor() #[[#ATTR]] {
// NONLEAF: attributes #[[#ATTR]] = { nounwind "frame-pointer"="non-leaf" }
// NONLEAF: attributes #[[#ATTR]] = { nounwind "frame-pointer"="non-leaf"

// ALL: define internal void @asan.module_ctor() #[[#ATTR:]] {
// ALL: define internal void @asan.module_dtor() #[[#ATTR]] {
// ALL: attributes #[[#ATTR]] = { nounwind "frame-pointer"="all" }
// ALL: attributes #[[#ATTR]] = { nounwind "frame-pointer"="all"
4 changes: 2 additions & 2 deletions clang/test/CodeGen/asan-globals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,13 @@ void func() {
// CHECK-NEXT: call void @__asan_unregister_globals
// CHECK-NEXT: ret void

// CHECK: attributes #[[#ATTR]] = { nounwind }
// CHECK: attributes #[[#ATTR]] = { nounwind

/// If -fasynchronous-unwind-tables, set the module flag "uwtable". ctor/dtor
/// will thus get the uwtable attribute.
// RUN: %clang_cc1 -emit-llvm -fsanitize=address -funwind-tables=2 -o - %s | FileCheck %s --check-prefixes=UWTABLE
// UWTABLE: define internal void @asan.module_dtor() #[[#ATTR:]] {
// UWTABLE: attributes #[[#ATTR]] = { nounwind uwtable }
// UWTABLE: attributes #[[#ATTR]] = { nounwind uwtable
// UWTABLE: ![[#]] = !{i32 7, !"uwtable", i32 2}

// IGNORELIST-SRC: @{{.*}}extra_global{{.*}} ={{.*}} global
Expand Down
17 changes: 17 additions & 0 deletions clang/test/CodeGen/coverage-target-attr.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// RUN: %clang_cc1 -emit-llvm -coverage-notes-file=test.gcno -coverage-data-file=test.gcda -triple aarch64-linux-android30 -target-cpu generic -target-feature +tagged-globals -fsanitize=hwaddress %s -o %t
// RUN: FileCheck %s < %t

// CHECK: define internal void @__llvm_gcov_writeout() unnamed_addr [[ATTR:#[0-9]+]]
// CHECK: define internal void @__llvm_gcov_reset() unnamed_addr [[ATTR]]
// CHECK: define internal void @__llvm_gcov_init() unnamed_addr [[ATTR]]
// CHECK: define internal void @hwasan.module_ctor() [[ATTR2:#[0-9]+]]
// CHECK: attributes [[ATTR]] = {{.*}} "target-cpu"="generic" "target-features"="+tagged-globals"
// CHECK: attributes [[ATTR2]] = {{.*}} "target-cpu"="generic" "target-features"="+tagged-globals"

__attribute__((weak)) int foo = 0;

__attribute__((weak)) void bar() {}

int main() {
if (foo) bar();
}
2 changes: 1 addition & 1 deletion clang/test/CodeGen/sanitize-metadata-nosanitize.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ __attribute__((no_sanitize("all"))) int test_no_sanitize_all(int *x, int *y) {
// CHECK: attributes #1 = { mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none) "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
// CHECK: attributes #2 = { disable_sanitizer_instrumentation mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none) "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
// CHECK: attributes #3 = { mustprogress nofree norecurse nounwind willreturn memory(write, argmem: readwrite, inaccessiblemem: none) "min-legal-vector-width"="0" "no-trapping-math"="true" "no_sanitize_thread" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
// CHECK: attributes #4 = { nounwind }
// CHECK: attributes #4 = { nounwind "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
//.
// CHECK: !2 = !{!"sanmd_covered!C", !3}
// CHECK: !3 = !{i64 0}
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ typedef unsigned int uint;
typedef unsigned long ulong;

// CHECK-LABEL: @test_permlane16(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane16(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane16.i32(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0);
}

// CHECK-LABEL: @test_permlanex16(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlanex16(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlanex16.i32(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
}
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1)
}

// CHECK-LABEL: @test_permlane64(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane64(i32 %a)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane64.i32(i32 %a)
void test_permlane64(global uint* out, uint a) {
*out = __builtin_amdgcn_permlane64(a);
}
Expand Down
165 changes: 93 additions & 72 deletions clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

258 changes: 144 additions & 114 deletions clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp

Large diffs are not rendered by default.

96 changes: 96 additions & 0 deletions clang/test/OpenMP/reduction_complex.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
// RUN: -triple powerpc64le-unknown-unknown \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
// RUN: %t-ppc-host.bc

// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
// RUN: -triple nvptx64-unknown-unknown -DCUA \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s \
// RUN: -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
// RUN: -o - | FileCheck %s --check-prefix CHECK

// expected-no-diagnostics
int foo() {
int i;
int j;
_Complex float sum = 0;

#pragma omp target teams loop reduction(+:sum) collapse(2) bind(parallel) order(concurrent) lastprivate(j) map(tofrom:sum)

for(i=0; i<10; i++)
for(j=0; j<10; j++)
sum += i;

return 0;
}
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: %[[VAL_228:.*]] = alloca ptr, align 8
// CHECK-NEXT: %[[VAL_229:.*]] = alloca i16, align 2
// CHECK-NEXT: %[[VAL_230:.*]] = alloca i16, align 2
// CHECK-NEXT: %[[VAL_231:.*]] = alloca i16, align 2
// CHECK-NEXT: %[[VAL_232:.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: %[[VAL_233:.*]] = alloca { float, float }, align 8
// CHECK-NEXT: store ptr %[[VAL_234:.*]], ptr %[[VAL_228]], align 8
// CHECK-NEXT: store i16 %[[VAL_235:.*]], ptr %[[VAL_229]], align 2
// CHECK-NEXT: store i16 %[[VAL_236:.*]], ptr %[[VAL_230]], align 2
// CHECK-NEXT: store i16 %[[VAL_237:.*]], ptr %[[VAL_231]], align 2
// CHECK-NEXT: %[[VAL_238:.*]] = load ptr, ptr %[[VAL_228]], align 8
// CHECK-NEXT: %[[VAL_239:.*]] = load i16, ptr %[[VAL_229]], align 2
// CHECK-NEXT: %[[VAL_240:.*]] = load i16, ptr %[[VAL_230]], align 2
// CHECK-NEXT: %[[VAL_241:.*]] = load i16, ptr %[[VAL_231]], align 2
// CHECK-NEXT: %[[VAL_242:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_238]], i64 0, i64 0
// CHECK-NEXT: %[[VAL_243:.*]] = load ptr, ptr %[[VAL_242]], align 8
// CHECK-NEXT: %[[VAL_244:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_232]], i64 0, i64 0
// CHECK-NEXT: %[[VAL_245:.*]] = getelementptr { float, float }, ptr %[[VAL_243]], i64 1
// CHECK-NEXT: %[[VAL_246:.*]] = load i64, ptr %[[VAL_243]], align 8
// CHECK-NEXT: %[[VAL_247:.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-NEXT: %[[VAL_248:.*]] = trunc i32 %[[VAL_247]] to i16
// CHECK-NEXT: %[[VAL_249:.*]] = call i64 @__kmpc_shuffle_int64(i64 %[[VAL_246]], i16 %[[VAL_240]], i16 %[[VAL_248]])
// CHECK-NEXT: store i64 %[[VAL_249]], ptr %[[VAL_233]], align 8
// CHECK-NEXT: %[[VAL_250:.*]] = getelementptr i64, ptr %[[VAL_243]], i64 1
// CHECK-NEXT: %[[VAL_251:.*]] = getelementptr i64, ptr %[[VAL_233]], i64 1
// CHECK-NEXT: store ptr %[[VAL_233]], ptr %[[VAL_244]], align 8
// CHECK-NEXT: %[[VAL_252:.*]] = icmp eq i16 %[[VAL_241]], 0
// CHECK-NEXT: %[[VAL_253:.*]] = icmp eq i16 %[[VAL_241]], 1
// CHECK-NEXT: %[[VAL_254:.*]] = icmp ult i16 %[[VAL_239]], %[[VAL_240]]
// CHECK-NEXT: %[[VAL_255:.*]] = and i1 %[[VAL_253]], %[[VAL_254]]
// CHECK-NEXT: %[[VAL_256:.*]] = icmp eq i16 %[[VAL_241]], 2
// CHECK-NEXT: %[[VAL_257:.*]] = and i16 %[[VAL_239]], 1
// CHECK-NEXT: %[[VAL_258:.*]] = icmp eq i16 %[[VAL_257]], 0
// CHECK-NEXT: %[[VAL_259:.*]] = and i1 %[[VAL_256]], %[[VAL_258]]
// CHECK-NEXT: %[[VAL_260:.*]] = icmp sgt i16 %[[VAL_240]], 0
// CHECK-NEXT: %[[VAL_261:.*]] = and i1 %[[VAL_259]], %[[VAL_260]]
// CHECK-NEXT: %[[VAL_262:.*]] = or i1 %[[VAL_252]], %[[VAL_255]]
// CHECK-NEXT: %[[VAL_263:.*]] = or i1 %[[VAL_262]], %[[VAL_261]]
// CHECK-NEXT: br i1 %[[VAL_263]], label %[[VAL_264:.*]], label %[[VAL_265:.*]]
// CHECK: then: ; preds = %[[VAL_266:.*]]
// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l{{[0-9]+}}_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr %[[VAL_238]], ptr %[[VAL_232]]) #2
// CHECK-NEXT: br label %[[VAL_267:.*]]
// CHECK: else: ; preds = %[[VAL_266]]
// CHECK-NEXT: br label %[[VAL_267]]
// CHECK: ifcont: ; preds = %[[VAL_265]], %[[VAL_264]]
// CHECK-NEXT: %[[VAL_268:.*]] = icmp eq i16 %[[VAL_241]], 1
// CHECK-NEXT: %[[VAL_269:.*]] = icmp uge i16 %[[VAL_239]], %[[VAL_240]]
// CHECK-NEXT: %[[VAL_270:.*]] = and i1 %[[VAL_268]], %[[VAL_269]]
// CHECK-NEXT: br i1 %[[VAL_270]], label %[[VAL_271:.*]], label %[[VAL_272:.*]]
// CHECK: then4: ; preds = %[[VAL_267]]
// CHECK-NEXT: %[[VAL_273:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_232]], i64 0, i64 0
// CHECK-NEXT: %[[VAL_274:.*]] = load ptr, ptr %[[VAL_273]], align 8
// CHECK-NEXT: %[[VAL_275:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_238]], i64 0, i64 0
// CHECK-NEXT: %[[VAL_276:.*]] = load ptr, ptr %[[VAL_275]], align 8
// CHECK-NEXT: %[[VAL_277:.*]] = getelementptr inbounds { float, float }, ptr %[[VAL_274]], i32 0, i32 0
// CHECK-NEXT: %[[VAL_278:.*]] = load float, ptr %[[VAL_277]], align 4
// CHECK-NEXT: %[[VAL_279:.*]] = getelementptr inbounds { float, float }, ptr %[[VAL_274]], i32 0, i32 1
// CHECK-NEXT: %[[VAL_280:.*]] = load float, ptr %[[VAL_279]], align 4
// CHECK-NEXT: %[[VAL_281:.*]] = getelementptr inbounds { float, float }, ptr %[[VAL_276]], i32 0, i32 0
// CHECK-NEXT: %[[VAL_282:.*]] = getelementptr inbounds { float, float }, ptr %[[VAL_276]], i32 0, i32 1
// CHECK-NEXT: store float %[[VAL_278]], ptr %[[VAL_281]], align 4
// CHECK-NEXT: store float %[[VAL_280]], ptr %[[VAL_282]], align 4
// CHECK-NEXT: br label %[[VAL_283:.*]]
// CHECK: else7: ; preds = %[[VAL_267]]
// CHECK-NEXT: br label %[[VAL_283]]
// CHECK: ifcont8: ; preds = %[[VAL_272]], %[[VAL_271]]
// CHECK-NEXT: ret void
9 changes: 5 additions & 4 deletions clang/test/OpenMP/reduction_implicit_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ int main()
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
Expand All @@ -249,6 +248,7 @@ int main()
// CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
// CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK: body:
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
Expand All @@ -263,21 +263,22 @@ int main()
// CHECK: else:
// CHECK-NEXT: br label [[IFCONT]]
// CHECK: ifcont:
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// CHECK: then2:
// CHECK: then3:
// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
// CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
// CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
// CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
// CHECK-NEXT: br label [[IFCONT4:%.*]]
// CHECK: else3:
// CHECK: else4:
// CHECK-NEXT: br label [[IFCONT4]]
// CHECK: ifcont4:
// CHECK: ifcont5:
// CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
// CHECK-NEXT: br label [[PRECOND]]
Expand Down
30 changes: 16 additions & 14 deletions clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ int foo() {
// IR-GPU-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
// IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr
Expand All @@ -102,7 +103,6 @@ int foo() {
// IR-GPU-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr
// IR-GPU-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr
// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8
Expand Down Expand Up @@ -258,6 +258,7 @@ int foo() {
// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
Expand All @@ -275,7 +276,6 @@ int foo() {
// IR-GPU-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr
// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
// IR-GPU-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
Expand Down Expand Up @@ -399,12 +399,12 @@ int foo() {
// IR-GPU-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
// IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
// IR-GPU-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
// IR-GPU-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
// IR-GPU-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
Expand Down Expand Up @@ -480,10 +480,9 @@ int foo() {
// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
// IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
// IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
Expand All @@ -499,6 +498,7 @@ int foo() {
// IR-GPU-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100
// IR-GPU-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
// IR-GPU: body:
// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP2]])
// IR-GPU-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// IR-GPU-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
Expand All @@ -513,21 +513,22 @@ int foo() {
// IR-GPU: else:
// IR-GPU-NEXT: br label [[IFCONT]]
// IR-GPU: ifcont:
// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]])
// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// IR-GPU-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
// IR-GPU-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// IR-GPU: then2:
// IR-GPU: then3:
// IR-GPU-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
// IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
// IR-GPU-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
// IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
// IR-GPU-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
// IR-GPU-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
// IR-GPU-NEXT: br label [[IFCONT4:%.*]]
// IR-GPU: else3:
// IR-GPU: else4:
// IR-GPU-NEXT: br label [[IFCONT4]]
// IR-GPU: ifcont4:
// IR-GPU: ifcont5:
// IR-GPU-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
// IR-GPU-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// IR-GPU-NEXT: br label [[PRECOND]]
Expand All @@ -544,12 +545,12 @@ int foo() {
// IR-GPU-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
// IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
// IR-GPU-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
// IR-GPU-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
// IR-GPU-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
Expand Down Expand Up @@ -625,10 +626,9 @@ int foo() {
// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
// IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
// IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
Expand All @@ -644,6 +644,7 @@ int foo() {
// IR-GPU-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100
// IR-GPU-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
// IR-GPU: body:
// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]])
// IR-GPU-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// IR-GPU-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
Expand All @@ -658,21 +659,22 @@ int foo() {
// IR-GPU: else:
// IR-GPU-NEXT: br label [[IFCONT]]
// IR-GPU: ifcont:
// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]])
// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// IR-GPU-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
// IR-GPU-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
// IR-GPU: then2:
// IR-GPU: then3:
// IR-GPU-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
// IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
// IR-GPU-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
// IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
// IR-GPU-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
// IR-GPU-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
// IR-GPU-NEXT: br label [[IFCONT4:%.*]]
// IR-GPU: else3:
// IR-GPU: else4:
// IR-GPU-NEXT: br label [[IFCONT4]]
// IR-GPU: ifcont4:
// IR-GPU: ifcont5:
// IR-GPU-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
// IR-GPU-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// IR-GPU-NEXT: br label [[PRECOND]]
Expand Down
6 changes: 6 additions & 0 deletions clang/test/SemaCXX/coroutine-vla.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ struct promise
void unhandled_exception();
};

// Test that we won't report the error incorrectly.
void bar(int n) {
int array[n];
return;
}

coroutine foo(int n) {
int array[n]; // expected-error {{variable length arrays in a coroutine are not supported}}
co_return;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -DUNION_TEST -verify %s

#ifdef UNION_TEST
Expand Down
31 changes: 23 additions & 8 deletions clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3789,36 +3789,51 @@ TEST(TransferTest, AddrOfReference) {
TEST(TransferTest, Preincrement) {
std::string Code = R"(
void target(int I) {
(void)0; // [[before]]
int &IRef = ++I;
// [[p]]
// [[after]]
}
)";
runDataflow(
Code,
[](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
ASTContext &ASTCtx) {
const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
const Environment &EnvBefore =
getEnvironmentAtAnnotation(Results, "before");
const Environment &EnvAfter =
getEnvironmentAtAnnotation(Results, "after");

EXPECT_EQ(&getLocForDecl(ASTCtx, Env, "IRef"),
&getLocForDecl(ASTCtx, Env, "I"));
EXPECT_EQ(&getLocForDecl(ASTCtx, EnvAfter, "IRef"),
&getLocForDecl(ASTCtx, EnvBefore, "I"));

const ValueDecl *IDecl = findValueDecl(ASTCtx, "I");
EXPECT_NE(EnvBefore.getValue(*IDecl), nullptr);
EXPECT_EQ(EnvAfter.getValue(*IDecl), nullptr);
});
}

TEST(TransferTest, Postincrement) {
std::string Code = R"(
void target(int I) {
(void)0; // [[before]]
int OldVal = I++;
// [[p]]
// [[after]]
}
)";
runDataflow(
Code,
[](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
ASTContext &ASTCtx) {
const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
const Environment &EnvBefore =
getEnvironmentAtAnnotation(Results, "before");
const Environment &EnvAfter =
getEnvironmentAtAnnotation(Results, "after");

EXPECT_EQ(&getValueForDecl(ASTCtx, EnvBefore, "I"),
&getValueForDecl(ASTCtx, EnvAfter, "OldVal"));

EXPECT_EQ(&getValueForDecl(ASTCtx, Env, "OldVal"),
&getValueForDecl(ASTCtx, Env, "I"));
const ValueDecl *IDecl = findValueDecl(ASTCtx, "I");
EXPECT_EQ(EnvAfter.getValue(*IDecl), nullptr);
});
}

Expand Down
2 changes: 2 additions & 0 deletions compiler-rt/include/profile/MIBEntryDef.inc
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)
29 changes: 27 additions & 2 deletions compiler-rt/include/profile/MemProfData.inc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)

// The version number of the raw binary format.
#define MEMPROF_RAW_VERSION 3ULL
#define MEMPROF_RAW_VERSION 4ULL

// Currently supported versions.
#define MEMPROF_RAW_SUPPORTED_VERSIONS \
{ 3ULL, 4ULL }

#define MEMPROF_BUILDID_MAX_SIZE 32ULL

Expand Down Expand Up @@ -119,7 +123,8 @@ MemInfoBlock() {
}

MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu,
uintptr_t Histogram, uint32_t HistogramSize)
: MemInfoBlock() {
AllocCount = 1U;
TotalAccessCount = AccessCount;
Expand Down Expand Up @@ -149,6 +154,8 @@ MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
AllocCpuId = AllocCpu;
DeallocCpuId = DeallocCpu;
NumMigratedCpu = AllocCpuId != DeallocCpuId;
AccessHistogramSize = HistogramSize;
AccessHistogram = Histogram;
}

void Merge(const MemInfoBlock &newMIB) {
Expand Down Expand Up @@ -194,6 +201,24 @@ void Merge(const MemInfoBlock &newMIB) {
NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
AllocCpuId = newMIB.AllocCpuId;
DeallocCpuId = newMIB.DeallocCpuId;

// For merging histograms, we always keep the longer histogram, and add
// values of shorter histogram to larger one.
uintptr_t ShorterHistogram;
uint32_t ShorterHistogramSize;
if (newMIB.AccessHistogramSize > AccessHistogramSize) {
ShorterHistogram = AccessHistogram;
ShorterHistogramSize = AccessHistogramSize;
// Swap histogram of current to larger histogram
AccessHistogram = newMIB.AccessHistogram;
AccessHistogramSize = newMIB.AccessHistogramSize;
} else {
ShorterHistogram = newMIB.AccessHistogram;
ShorterHistogramSize = newMIB.AccessHistogramSize;
}
for (size_t i = 0; i < ShorterHistogramSize; ++i) {
((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
}
}

#ifdef _MSC_VER
Expand Down
84 changes: 73 additions & 11 deletions compiler-rt/lib/memprof/memprof_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
#include <sched.h>
#include <time.h>

#define MAX_HISTOGRAM_PRINT_SIZE 32U

extern bool __memprof_histogram;

namespace __memprof {
namespace {
using ::llvm::memprof::MemInfoBlock;
Expand Down Expand Up @@ -68,6 +72,14 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
"cpu: %u, num same dealloc_cpu: %u\n",
M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu,
M.NumSameDeallocCpu);
Printf("AccessCountHistogram[%u]: ", M.AccessHistogramSize);
uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE
? MAX_HISTOGRAM_PRINT_SIZE
: M.AccessHistogramSize;
for (size_t i = 0; i < PrintSize; ++i) {
Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]);
}
Printf("\n");
}
}
} // namespace
Expand Down Expand Up @@ -216,15 +228,34 @@ u64 GetShadowCount(uptr p, u32 size) {
return count;
}

// Accumulates the access count from the shadow for the given pointer and size.
// See memprof_mapping.h for an overview on histogram counters.
u64 GetShadowCountHistogram(uptr p, u32 size) {
u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p);
u8 *shadow_end = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p + size);
u64 count = 0;
for (; shadow <= shadow_end; shadow++)
count += *shadow;
return count;
}

// Clears the shadow counters (when memory is allocated).
void ClearShadow(uptr addr, uptr size) {
CHECK(AddrIsAlignedByGranularity(addr));
CHECK(AddrIsInMem(addr));
CHECK(AddrIsAlignedByGranularity(addr + size));
CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
CHECK(REAL(memset));
uptr shadow_beg = MEM_TO_SHADOW(addr);
uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
uptr shadow_beg;
uptr shadow_end;
if (__memprof_histogram) {
shadow_beg = HISTOGRAM_MEM_TO_SHADOW(addr);
shadow_end = HISTOGRAM_MEM_TO_SHADOW(addr + size);
} else {
shadow_beg = MEM_TO_SHADOW(addr);
shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
}

if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
} else {
Expand Down Expand Up @@ -279,6 +310,44 @@ struct Allocator {
Print(Value->mib, Key, bool(Arg));
}

// See memprof_mapping.h for an overview on histogram counters.
static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) {
if (__memprof_histogram) {
return CreateNewMIBWithHistogram(p, m, user_size);
} else {
return CreateNewMIBWithoutHistogram(p, m, user_size);
}
}

static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m,
u64 user_size) {

u64 c = GetShadowCountHistogram(p, user_size);
long curtime = GetTimestamp();
uint32_t HistogramSize =
RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY;
uintptr_t Histogram =
(uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t));
memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t));
for (size_t i = 0; i < HistogramSize; ++i) {
u8 Counter =
*((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i));
((uint64_t *)Histogram)[i] = (uint64_t)Counter;
}
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
GetCpuId(), Histogram, HistogramSize);
return newMIB;
}

static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m,
u64 user_size) {
u64 c = GetShadowCount(p, user_size);
long curtime = GetTimestamp();
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
GetCpuId(), 0, 0);
return newMIB;
}

void FinishAndWrite() {
if (print_text && common_flags()->print_module_map)
DumpProcessMap();
Expand Down Expand Up @@ -319,10 +388,7 @@ struct Allocator {
if (!m)
return;
uptr user_beg = ((uptr)m) + kChunkHeaderSize;
u64 c = GetShadowCount(user_beg, user_requested_size);
long curtime = GetTimestamp();
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
m->cpu_id, GetCpuId());
MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size);
InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
},
this);
Expand Down Expand Up @@ -451,11 +517,7 @@ struct Allocator {
atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
if (memprof_inited && atomic_load_relaxed(&constructed) &&
!atomic_load_relaxed(&destructing)) {
u64 c = GetShadowCount(p, user_requested_size);
long curtime = GetTimestamp();

MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
m->cpu_id, GetCpuId());
MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_requested_size);
InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
}

Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/memprof/memprof_flags.inc
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
MEMPROF_FLAG(bool, print_text, false,
"If set, prints the heap profile in text format. Else use the raw binary serialization format.")
MEMPROF_FLAG(bool, print_terse, false,
"If set, prints memory profile in a terse format. Only applicable if print_text = true.")
"If set, prints memory profile in a terse format. Only applicable if print_text = true.")
37 changes: 36 additions & 1 deletion compiler-rt/lib/memprof/memprof_mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ static const u64 kDefaultShadowScale = 3;

#define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
#define MEMPROF_ALIGNMENT 32

namespace __memprof {

extern uptr kHighMemEnd; // Initialized in __memprof_init.
Expand All @@ -37,6 +36,34 @@ extern uptr kHighMemEnd; // Initialized in __memprof_init.
#define MEM_TO_SHADOW(mem) \
((((mem) & SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))

// Histogram shadow memory is laid different to the standard configuration:

// 8 bytes
// +---+---+---+ +---+---+---+ +---+---+---+
// Memory | a | | b | | c |
// +---+---+---+ +---+---+---+ +---+---+---+

// +---+ +---+ +---+
// Shadow | a | | b | | c |
// +---+ +---+ +---+
// 1 byte
//
// Where we have a 1 byte counter for each 8 bytes. HISTOGRAM_MEM_TO_SHADOW
// translates a memory address to the address of its corresponding shadow
// counter memory address. The same data is still provided in MIB whether
// histograms are used or not. Total access counts per allocations are
// computed by summing up all individual 1 byte counters. This can incur an
// accuracy penalty.

#define HISTOGRAM_GRANULARITY 8U

#define HISTOGRAM_MAX_COUNTER 255U

#define HISTOGRAM_SHADOW_MASK ~(HISTOGRAM_GRANULARITY - 1)

#define HISTOGRAM_MEM_TO_SHADOW(mem) \
((((mem) & HISTOGRAM_SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))

#define SHADOW_ENTRY_SIZE (MEM_GRANULARITY >> SHADOW_SCALE)

#define kLowMemBeg 0
Expand Down Expand Up @@ -108,6 +135,14 @@ inline void RecordAccess(uptr a) {
(*shadow_address)++;
}

inline void RecordAccessHistogram(uptr a) {
CHECK_EQ(SHADOW_ENTRY_SIZE, 8);
u8 *shadow_address = (u8 *)HISTOGRAM_MEM_TO_SHADOW(a);
if (*shadow_address < HISTOGRAM_MAX_COUNTER) {
(*shadow_address)++;
}
}

} // namespace __memprof

#endif // MEMPROF_MAPPING_H
11 changes: 11 additions & 0 deletions compiler-rt/lib/memprof/memprof_mibmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,18 @@ void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) {
} else {
LockedMemInfoBlock *lmib = *h;
SpinMutexLock lock(&lmib->mutex);
uintptr_t ShorterHistogram;
if (Block.AccessHistogramSize > lmib->mib.AccessHistogramSize)
ShorterHistogram = lmib->mib.AccessHistogram;
else
ShorterHistogram = Block.AccessHistogram;

lmib->mib.Merge(Block);
// The larger histogram is kept and the shorter histogram is discarded after
// adding the counters to the larger historam. Free only the shorter
// Histogram
if (Block.AccessHistogramSize > 0 || lmib->mib.AccessHistogramSize > 0)
InternalFree((void *)ShorterHistogram);
}
}

Expand Down
53 changes: 45 additions & 8 deletions compiler-rt/lib/memprof/memprof_rawprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,24 +146,38 @@ void SerializeStackToBuffer(const Vector<u64> &StackIds,
// ---------- MIB Entry 0
// Alloc Count
// ...
// ---- AccessHistogram Entry 0
// ...
// ---- AccessHistogram Entry AccessHistogramSize - 1
// ---------- MIB Entry 1
// Alloc Count
// ...
// ---- AccessHistogram Entry 0
// ...
// ---- AccessHistogram Entry AccessHistogramSize - 1
// ----------
void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
const u64 ExpectedNumBytes, char *&Buffer) {
char *Ptr = Buffer;
const u64 NumEntries = StackIds.Size();
Ptr = WriteBytes(NumEntries, Ptr);

for (u64 i = 0; i < NumEntries; i++) {
const u64 Key = StackIds[i];
MIBMapTy::Handle h(&MIBMap, Key, /*remove=*/true, /*create=*/false);
CHECK(h.exists());
Ptr = WriteBytes(Key, Ptr);
// FIXME: We unnecessarily serialize the AccessHistogram pointer. Adding a
// serialization schema will fix this issue. See also FIXME in
// deserialization.
Ptr = WriteBytes((*h)->mib, Ptr);
for (u64 j = 0; j < (*h)->mib.AccessHistogramSize; ++j) {
u64 HistogramEntry = ((u64 *)((*h)->mib.AccessHistogram))[j];
Ptr = WriteBytes(HistogramEntry, Ptr);
}
if ((*h)->mib.AccessHistogramSize > 0) {
InternalFree((void *)((*h)->mib.AccessHistogram));
}
}

CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
"Expected num bytes != actual bytes written");
}
Expand Down Expand Up @@ -192,7 +206,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
// ---------- MIB Entry
// Alloc Count
// ...
// ----------
// ---- AccessHistogram Entry 0
// ...
// ---- AccessHistogram Entry AccessHistogramSize - 1
// ---------- MIB Entry 1
// Alloc Count
// ...
// ---- AccessHistogram Entry 0
// ...
// ---- AccessHistogram Entry AccessHistogramSize - 1
// Optional Padding Bytes
// ---------- Stack Info
// Num Entries
Expand All @@ -218,13 +240,26 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
const u64 NumMIBInfoBytes = RoundUpTo(
sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock)), 8);

// Get Number of AccessHistogram entries in total
u64 TotalAccessHistogramEntries = 0;
MIBMap.ForEach(
[](const uptr Key, UNUSED LockedMemInfoBlock *const &MIB, void *Arg) {
u64 *TotalAccessHistogramEntries = (u64 *)Arg;
*TotalAccessHistogramEntries += MIB->mib.AccessHistogramSize;
},
reinterpret_cast<void *>(&TotalAccessHistogramEntries));
const u64 NumHistogramBytes =
RoundUpTo(TotalAccessHistogramEntries * sizeof(uint64_t), 8);

const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8);

// Ensure that the profile is 8b aligned. We allow for some optional padding
// at the end so that any subsequent profile serialized to the same file does
// not incur unaligned accesses.
const u64 TotalSizeBytes = RoundUpTo(
sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes, 8);
const u64 TotalSizeBytes =
RoundUpTo(sizeof(Header) + NumSegmentBytes + NumStackBytes +
NumMIBInfoBytes + NumHistogramBytes,
8);

// Allocate the memory for the entire buffer incl. info blocks.
Buffer = (char *)InternalAlloc(TotalSizeBytes);
Expand All @@ -235,14 +270,16 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
static_cast<u64>(TotalSizeBytes),
sizeof(Header),
sizeof(Header) + NumSegmentBytes,
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes};
sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes +
NumHistogramBytes};
Ptr = WriteBytes(header, Ptr);

SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr);
Ptr += NumSegmentBytes;

SerializeMIBInfoToBuffer(MIBMap, StackIds, NumMIBInfoBytes, Ptr);
Ptr += NumMIBInfoBytes;
SerializeMIBInfoToBuffer(MIBMap, StackIds,
NumMIBInfoBytes + NumHistogramBytes, Ptr);
Ptr += NumMIBInfoBytes + NumHistogramBytes;

SerializeStackToBuffer(StackIds, NumStackBytes, Ptr);

Expand Down
23 changes: 23 additions & 0 deletions compiler-rt/lib/memprof/memprof_rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ uptr __memprof_shadow_memory_dynamic_address; // Global interface symbol.
// Allow the user to specify a profile output file via the binary.
SANITIZER_WEAK_ATTRIBUTE char __memprof_profile_filename[1];

// Share ClHistogram compiler flag with runtime.
SANITIZER_WEAK_ATTRIBUTE bool __memprof_histogram;

namespace __memprof {

static void MemprofDie() {
Expand Down Expand Up @@ -75,12 +78,23 @@ uptr kHighMemEnd;
// exported functions

#define MEMPROF_MEMORY_ACCESS_CALLBACK_BODY() __memprof::RecordAccess(addr);
#define MEMPROF_MEMORY_ACCESS_CALLBACK_BODY_HIST() \
__memprof::RecordAccessHistogram(addr);

#define MEMPROF_MEMORY_ACCESS_CALLBACK(type) \
extern "C" NOINLINE INTERFACE_ATTRIBUTE void __memprof_##type(uptr addr) { \
MEMPROF_MEMORY_ACCESS_CALLBACK_BODY() \
}

#define MEMPROF_MEMORY_ACCESS_CALLBACK_HIST(type) \
extern "C" NOINLINE INTERFACE_ATTRIBUTE void __memprof_hist_##type( \
uptr addr) { \
MEMPROF_MEMORY_ACCESS_CALLBACK_BODY_HIST() \
}

MEMPROF_MEMORY_ACCESS_CALLBACK_HIST(load)
MEMPROF_MEMORY_ACCESS_CALLBACK_HIST(store)

MEMPROF_MEMORY_ACCESS_CALLBACK(load)
MEMPROF_MEMORY_ACCESS_CALLBACK(store)

Expand Down Expand Up @@ -260,11 +274,20 @@ void __memprof_record_access(void const volatile *addr) {
__memprof::RecordAccess((uptr)addr);
}

void __memprof_record_access_hist(void const volatile *addr) {
__memprof::RecordAccessHistogram((uptr)addr);
}

void __memprof_record_access_range(void const volatile *addr, uptr size) {
for (uptr a = (uptr)addr; a < (uptr)addr + size; a += kWordSize)
__memprof::RecordAccess(a);
}

void __memprof_record_access_range_hist(void const volatile *addr, uptr size) {
for (uptr a = (uptr)addr; a < (uptr)addr + size; a += kWordSize)
__memprof::RecordAccessHistogram(a);
}

extern "C" SANITIZER_INTERFACE_ATTRIBUTE u16
__sanitizer_unaligned_load16(const uu16 *p) {
__memprof_record_access(p);
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/memprof/tests/rawprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ TEST(MemProf, Basic) {
// sizeof(MemInfoBlock) contains stack id + MeminfoBlock.
EXPECT_EQ(StackOffset - MIBOffset, 8 + 2 * (8 + sizeof(MemInfoBlock)));

EXPECT_EQ(StackOffset, 408ULL);
EXPECT_EQ(StackOffset, 432ULL);
// We expect 2 stack entries, with 5 frames - 8b for total count,
// 2 * (8b for id, 8b for frame count and 5*8b for fake frames).
// Since this is the last section, there may be additional padding at the end
Expand Down
7 changes: 3 additions & 4 deletions compiler-rt/lib/profile/InstrProfilingFile.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,9 @@ COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
#define WIN_SYM_PREFIX
#endif
#pragma comment( \
linker, \
"/alternatename:" WIN_SYM_PREFIX INSTR_PROF_QUOTE( \
INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" WIN_SYM_PREFIX \
INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
linker, "/alternatename:" WIN_SYM_PREFIX INSTR_PROF_QUOTE( \
INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" WIN_SYM_PREFIX \
INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
#else
COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR
__attribute__((weak, alias(INSTR_PROF_QUOTE(
Expand Down
10 changes: 5 additions & 5 deletions compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profraw | FileCheck %s --check-prefixes=COMMON,RAW

// Generate indexed profile from raw profile and show the data.
// RUN: llvm-profdata merge %t-test.profraw -o %t-test.profdata
// RUN: llvm-profdata merge --keep-vtable-symbols %t-test.profraw -o %t-test.profdata
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED

// Generate text profile from raw and indexed profiles respectively and show the data.
// RUN: llvm-profdata merge --text %t-test.profraw -o %t-raw.proftext
// RUN: llvm-profdata merge --keep-vtable-symbols --text %t-test.profraw -o %t-raw.proftext
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-raw.proftext | FileCheck %s --check-prefix=ICTEXT
// RUN: llvm-profdata merge --text %t-test.profdata -o %t-indexed.proftext
// RUN: llvm-profdata merge --keep-vtable-symbols --text %t-test.profdata -o %t-indexed.proftext
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-indexed.proftext | FileCheck %s --check-prefix=ICTEXT

// Generate indexed profile from text profiles and show the data
// RUN: llvm-profdata merge --binary %t-raw.proftext -o %t-text.profraw
// RUN: llvm-profdata merge --keep-vtable-symbols --binary %t-raw.proftext -o %t-text.profraw
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED
// RUN: llvm-profdata merge --binary %t-indexed.proftext -o %t-text.profdata
// RUN: llvm-profdata merge --keep-vtable-symbols --binary %t-indexed.proftext -o %t-text.profdata
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED

// COMMON: Counters:
Expand Down
2 changes: 1 addition & 1 deletion flang/include/flang/Lower/BoxAnalyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ struct ScalarDynamicDerived : ScalarSym {
: ScalarSym{sym}, lens{std::move(lens)} {}

private:
llvm::SmallVector<Fortran::lower::SomeExpr> lens;
llvm::SmallVector<Fortran::lower::SomeExpr, 1> lens;
};

struct LBoundsAndShape {
Expand Down
12 changes: 8 additions & 4 deletions libc/config/linux/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ set(TARGET_LIBC_ENTRYPOINTS
# errno.h entrypoints
libc.src.errno.errno

# setjmp.h entrypoints
libc.src.setjmp.longjmp
libc.src.setjmp.setjmp

# string.h entrypoints
libc.src.string.bcmp
libc.src.string.bcopy
Expand Down Expand Up @@ -185,6 +181,14 @@ set(TARGET_LIBC_ENTRYPOINTS

)

if(LLVM_LIBC_FULL_BUILD)
list(APPEND TARGET_LIBC_ENTRYPOINTS
# setjmp.h entrypoints
libc.src.setjmp.longjmp
libc.src.setjmp.setjmp
)
endif()

set(TARGET_LIBM_ENTRYPOINTS
# fenv.h entrypoints
libc.src.fenv.feclearexcept
Expand Down
2 changes: 2 additions & 0 deletions libc/docs/gpu/support.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ strtold |check|
strtoll |check|
strtoul |check|
strtoull |check|
srand |check|
rand |check|
============= ========= ============

inttypes.h
Expand Down
9 changes: 9 additions & 0 deletions libc/hdr/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,12 @@ add_proxy_header_library(
libc.include.llvm-libc-types.atexithandler_t
libc.include.stdlib
)

add_proxy_header_library(
struct_sigaction
HDRS
struct_sigaction.h
FULL_BUILD_DEPENDS
libc.include.llvm-libc-types.struct_sigaction
libc.include.signal
)
21 changes: 21 additions & 0 deletions libc/hdr/types/struct_sigaction.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Proxy for struct sigaction ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_SIGACTION_H
#define LLVM_LIBC_HDR_TYPES_STRUCT_SIGACTION_H

#ifdef LIBC_FULL_BUILD

#include "include/llvm-libc-types/struct_sigaction.h"

#else

#include <signal.h>

#endif // LIBC_FULL_BUILD

#endif // LLVM_LIBC_HDR_TYPES_STRUCT_SIGACTION_H
21 changes: 21 additions & 0 deletions libc/newhdrgen/class_implementation/classes/enumeration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env python
#
# ====-- Enumeration class for libc function headers ----------*- python -*--==#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ==-------------------------------------------------------------------------==#


class Enumeration:
def __init__(self, name, value=None):
self.name = name
self.value = value

def __str__(self):
if self.value != None:
return f"{self.name} = {self.value}"
else:
return f"{self.name}"
29 changes: 29 additions & 0 deletions libc/newhdrgen/class_implementation/classes/function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python
#
# ====-- Function class for libc function headers -------------*- python -*--==#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ==-------------------------------------------------------------------------==#


class Function:
def __init__(
self, standards, return_type, name, arguments, guard=None, attributes=[]
):
self.standards = standards
self.return_type = return_type
self.name = name
self.arguments = [arg["type"] for arg in arguments]
self.guard = guard
self.attributes = attributes

def __str__(self):
args_str = ", ".join(self.arguments)
attributes_str = " ".join(self.attributes)
result = f"{self.return_type} {self.name}({args_str}){attributes_str};"
if self.guard:
result = f"#ifdef {self.guard}\n{result}\n#endif // {self.guard}"
return result
17 changes: 17 additions & 0 deletions libc/newhdrgen/class_implementation/classes/include.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python
#
# ====-- Include class for libc function headers --------------*- python -*--==#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ==-------------------------------------------------------------------------==#


class Include:
def __init__(self, name):
self.name = name

def __str__(self):
return f'#include "{self.name}"'
21 changes: 21 additions & 0 deletions libc/newhdrgen/class_implementation/classes/macro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env python
#
# ====-- Macro class for libc function headers ----------------*- python -*--==#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ==-------------------------------------------------------------------------==#


class Macro:
def __init__(self, name, value=None):
self.name = name
self.value = value

def __str__(self):
if self.value != None:
return f"#define {self.name} {self.value}"
else:
return f"#define {self.name}"
18 changes: 18 additions & 0 deletions libc/newhdrgen/class_implementation/classes/object.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env python
#
# ====-- Object class for libc function headers ---------------*- python -*--==#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ==-------------------------------------------------------------------------==#


class Object:
def __init__(self, name, type):
self.name = name
self.type = type

def __str__(self):
return f"extern {self.type} {self.name}"
17 changes: 17 additions & 0 deletions libc/newhdrgen/class_implementation/classes/type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python
#
# ====-- Type class for libc function headers -----------------*- python -*--==#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ==-------------------------------------------------------------------------==#


class Type:
def __init__(self, type_name):
self.type_name = type_name

def __str__(self):
return f"#include <llvm-libc-types/{self.type_name}.h>"
10 changes: 3 additions & 7 deletions libc/src/__support/OSUtil/linux/fcntl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,11 @@ int fcntl(int fd, int cmd, void *arg) {
}
case F_GETOWN: {
struct f_owner_ex fex;
int retVal =
int ret =
LIBC_NAMESPACE::syscall_impl<int>(SYS_fcntl, fd, F_GETOWN_EX, &fex);
if (retVal == -EINVAL)
return LIBC_NAMESPACE::syscall_impl<int>(SYS_fcntl, fd, cmd,
reinterpret_cast<void *>(arg));
if (static_cast<unsigned long>(retVal) <= -4096UL)
if (ret >= 0)
return fex.type == F_OWNER_PGRP ? -fex.pid : fex.pid;

libc_errno = -retVal;
libc_errno = -ret;
return -1;
}
// The general case
Expand Down
5 changes: 5 additions & 0 deletions libc/src/__support/fixedvector.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ template <typename T, size_t CAPACITY> class FixedVector {

LIBC_INLINE constexpr iterator begin() { return store.begin(); }
LIBC_INLINE constexpr iterator end() { return iterator{&store[item_count]}; }

LIBC_INLINE constexpr const_iterator begin() const { return store.begin(); }
LIBC_INLINE constexpr const_iterator end() const {
return const_iterator{&store[item_count]};
}
};

} // namespace LIBC_NAMESPACE
Expand Down
3 changes: 2 additions & 1 deletion libc/src/signal/linux/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ add_entrypoint_object(
DEPENDS
.__restore
libc.hdr.types.sigset_t
libc.hdr.types.struct_sigaction
libc.include.sys_syscall
libc.src.__support.OSUtil.osutil
libc.src.errno.errno
Expand Down Expand Up @@ -124,7 +125,7 @@ add_entrypoint_object(
../signal.h
DEPENDS
.sigaction
libc.include.signal
libc.hdr.signal_macros
)

add_entrypoint_object(
Expand Down
6 changes: 2 additions & 4 deletions libc/src/signal/linux/signal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@
//===----------------------------------------------------------------------===//

#include "src/signal/signal.h"
#include "src/signal/sigaction.h"

#include "hdr/signal_macros.h"
#include "src/__support/common.h"

#include <signal.h>
#include "src/signal/sigaction.h"

namespace LIBC_NAMESPACE {

Expand Down
2 changes: 1 addition & 1 deletion libc/src/signal/sigaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifndef LLVM_LIBC_SRC_SIGNAL_SIGACTION_H
#define LLVM_LIBC_SRC_SIGNAL_SIGACTION_H

#include <signal.h>
#include "hdr/types/struct_sigaction.h"

namespace LIBC_NAMESPACE {

Expand Down
1 change: 1 addition & 0 deletions libc/src/stdlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ add_entrypoint_object(
DEPENDS
.rand_util
libc.include.stdlib
libc.src.__support.threads.sleep
)

add_entrypoint_object(
Expand Down
24 changes: 18 additions & 6 deletions libc/src/stdlib/rand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,31 @@

#include "src/stdlib/rand.h"
#include "src/__support/common.h"
#include "src/__support/threads/sleep.h"
#include "src/stdlib/rand_util.h"

namespace LIBC_NAMESPACE {

// Silence warnings on targets with slow atomics.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Watomic-alignment"

// An implementation of the xorshift64star pseudo random number generator. This
// is a good general purpose generator for most non-cryptographics applications.
LLVM_LIBC_FUNCTION(int, rand, (void)) {
unsigned long x = rand_next;
x ^= x >> 12;
x ^= x << 25;
x ^= x >> 27;
rand_next = x;
return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
unsigned long orig = rand_next.load(cpp::MemoryOrder::RELAXED);
for (;;) {
unsigned long x = orig;
x ^= x >> 12;
x ^= x << 25;
x ^= x >> 27;
if (rand_next.compare_exchange_strong(orig, x, cpp::MemoryOrder::ACQUIRE,
cpp::MemoryOrder::RELAXED))
return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
sleep_briefly();
}
}

#pragma GCC diagnostic pop

} // namespace LIBC_NAMESPACE
13 changes: 4 additions & 9 deletions libc/src/stdlib/rand_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,13 @@
//===----------------------------------------------------------------------===//

#include "src/stdlib/rand_util.h"
#include "src/__support/CPP/atomic.h"
#include "src/__support/macros/attributes.h"

namespace LIBC_NAMESPACE {

#ifdef LIBC_TARGET_ARCH_IS_GPU
// FIXME: Local GPU memory cannot be initialized so we cannot currently provide
// a standard compliant default value.
ThreadLocal<unsigned long> rand_next;
#else
// C standard 7.10p2: If 'rand' is called before 'srand' it is to proceed as if
// the 'srand' function was called with a value of '1'.
LIBC_THREAD_LOCAL unsigned long rand_next = 1;
#endif
// C standard 7.10p2: If 'rand' is called before 'srand' it is to
// proceed as if the 'srand' function was called with a value of '1'.
cpp::Atomic<unsigned long> rand_next = 1;

} // namespace LIBC_NAMESPACE
28 changes: 5 additions & 23 deletions libc/src/stdlib/rand_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,15 @@
#ifndef LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
#define LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H

#include "src/__support/GPU/utils.h"
#include "src/__support/CPP/atomic.h"
#include "src/__support/macros/attributes.h"

namespace LIBC_NAMESPACE {

#ifdef LIBC_TARGET_ARCH_IS_GPU
// Implement thread local storage on the GPU using local memory. Each thread
// gets its slot in the local memory array and is private to the group.
// TODO: We need to implement the 'thread_local' keyword on the GPU. This is an
// inefficient and incomplete stand-in until that is done.
template <typename T> class ThreadLocal {
private:
static constexpr long MAX_THREADS = 1024;
[[clang::loader_uninitialized]] static inline gpu::Local<T>
storage[MAX_THREADS];

public:
LIBC_INLINE operator T() const { return storage[gpu::get_thread_id()]; }
LIBC_INLINE void operator=(const T &value) {
storage[gpu::get_thread_id()] = value;
}
};

extern ThreadLocal<unsigned long> rand_next;
#else
extern LIBC_THREAD_LOCAL unsigned long rand_next;
#endif
// The ISO C standard does not explicitly require thread-safe behavior for the
// generic `rand()` function. Some implementations expect it however, so we
// provide it here.
extern cpp::Atomic<unsigned long> rand_next;

} // namespace LIBC_NAMESPACE

Expand Down
10 changes: 9 additions & 1 deletion libc/src/stdlib/srand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@

namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(void, srand, (unsigned int seed)) { rand_next = seed; }
// Silence warnings on targets with slow atomics.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Watomic-alignment"

LLVM_LIBC_FUNCTION(void, srand, (unsigned int seed)) {
rand_next.store(seed, cpp::MemoryOrder::RELAXED);
}

#pragma GCC diagnostic pop

} // namespace LIBC_NAMESPACE
10 changes: 10 additions & 0 deletions libc/test/src/__support/fixedvector_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,13 @@ TEST(LlvmLibcFixedVectorTest, ForwardIteration) {
ASSERT_EQ(*it, arr[idx]);
}
}

TEST(LlvmLibcFixedVectorTest, ConstForwardIteration) {
const LIBC_NAMESPACE::cpp::array<int, 4> arr{1, 2, 3, 4};
const LIBC_NAMESPACE::FixedVector<int, 5> vec(arr.begin(), arr.end());
ASSERT_EQ(vec.size(), arr.size());
for (auto it = vec.begin(); it != vec.end(); ++it) {
auto idx = it - vec.begin();
ASSERT_EQ(*it, arr[idx]);
}
}
1 change: 1 addition & 0 deletions libc/test/src/fcntl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ add_libc_unittest(
libc.src.fcntl.fcntl
libc.src.fcntl.open
libc.src.unistd.close
libc.src.unistd.getpid
libc.hdr.types.struct_flock
libc.hdr.fcntl_macros
libc.test.UnitTest.ErrnoSetterMatcher
Expand Down
20 changes: 20 additions & 0 deletions libc/test/src/fcntl/fcntl_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "src/fcntl/fcntl.h"
#include "src/fcntl/open.h"
#include "src/unistd/close.h"
#include "src/unistd/getpid.h"
#include "test/UnitTest/ErrnoSetterMatcher.h"
#include "test/UnitTest/Test.h"

Expand Down Expand Up @@ -163,3 +164,22 @@ TEST(LlvmLibcFcntlTest, UseAfterClose) {
ASSERT_EQ(-1, LIBC_NAMESPACE::fcntl(fd, F_GETFL));
ASSERT_ERRNO_EQ(EBADF);
}

TEST(LlvmLibcFcntlTest, SetGetOwnerTest) {
LIBC_NAMESPACE::libc_errno = 0;
using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;
pid_t pid = LIBC_NAMESPACE::getpid();
ASSERT_GT(pid, -1);
constexpr const char *TEST_FILE_NAME = "testdata/fcntl_set_get_owner.test";
auto TEST_FILE = libc_make_test_file_path(TEST_FILE_NAME);
int fd = LIBC_NAMESPACE::open(TEST_FILE, O_CREAT | O_TRUNC | O_RDWR, S_IRWXU);
ASSERT_ERRNO_SUCCESS();
ASSERT_GT(fd, 0);
int ret = LIBC_NAMESPACE::fcntl(fd, F_SETOWN, pid);
ASSERT_ERRNO_SUCCESS();
ASSERT_GT(ret, -1);
int ret2 = LIBC_NAMESPACE::fcntl(fd, F_GETOWN);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(ret2, pid);
ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0));
}
6 changes: 3 additions & 3 deletions libc/test/src/signal/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ add_libc_unittest(
SRCS
sigaction_test.cpp
DEPENDS
libc.include.errno
libc.include.signal
libc.hdr.errno_macros
libc.hdr.signal_macros
libc.src.signal.raise
libc.src.signal.sigaction
libc.test.UnitTest.ErrnoSetterMatcher
Expand Down Expand Up @@ -119,7 +119,7 @@ add_libc_unittest(
SRCS
sigaltstack_test.cpp
DEPENDS
libc.include.signal
libc.hdr.signal_macros
libc.src.errno.errno
libc.src.signal.raise
libc.src.signal.sigaltstack
Expand Down
6 changes: 2 additions & 4 deletions libc/test/src/signal/sigaction_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
//
//===----------------------------------------------------------------------===//

#include "hdr/errno_macros.h"
#include "hdr/signal_macros.h"
#include "src/signal/raise.h"
#include "src/signal/sigaction.h"

#include "test/UnitTest/ErrnoSetterMatcher.h"
#include "test/UnitTest/Test.h"

#include <errno.h>
#include <signal.h>

using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;

Expand Down
3 changes: 1 addition & 2 deletions libc/test/src/signal/sigaltstack_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@
//
//===----------------------------------------------------------------------===//

#include "hdr/signal_macros.h"
#include "src/__support/OSUtil/syscall.h" // For internal syscall function.
#include "src/errno/libc_errno.h"
#include "src/signal/linux/signal_utils.h"
#include "src/signal/raise.h"
#include "src/signal/sigaction.h"
#include "src/signal/sigaltstack.h"

#include "test/UnitTest/ErrnoSetterMatcher.h"
#include "test/UnitTest/Test.h"

#include <signal.h>
#include <stdint.h>
#include <sys/syscall.h>

Expand Down
3 changes: 0 additions & 3 deletions libc/test/src/stdlib/rand_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,12 @@ TEST(LlvmLibcRandTest, UnsetSeed) {
vals[i] = val;
}

// FIXME: The GPU implementation cannot initialize the seed correctly.
#ifndef LIBC_TARGET_ARCH_IS_GPU
// The C standard specifies that if 'srand' is never called it should behave
// as if 'srand' was called with a value of 1. If we seed the value with 1 we
// should get the same sequence as the unseeded version.
LIBC_NAMESPACE::srand(1);
for (size_t i = 0; i < 1000; ++i)
ASSERT_EQ(LIBC_NAMESPACE::rand(), vals[i]);
#endif
}

TEST(LlvmLibcRandTest, SetSeed) {
Expand Down
2 changes: 1 addition & 1 deletion libcxx/docs/Status/Cxx20Issues.csv
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@
"`3200 <https://wg21.link/LWG3200>`__","``midpoint``\ should not constrain ``T``\ is complete","Prague","|Nothing To Do|",""
"`3201 <https://wg21.link/LWG3201>`__","``lerp``\ should be marked as ``noexcept``\ ","Prague","|Complete|",""
"`3226 <https://wg21.link/LWG3226>`__","``zoned_time``\ constructor from ``string_view``\ should accept ``zoned_time<Duration2, TimeZonePtr2>``\ ","Prague","","","|chrono|"
"`3233 <https://wg21.link/LWG3233>`__","Broken requirements for ``shared_ptr``\ converting constructors","Prague","",""
"`3233 <https://wg21.link/LWG3233>`__","Broken requirements for ``shared_ptr``\ converting constructors","Prague","|Complete|","19.0"
"`3237 <https://wg21.link/LWG3237>`__","LWG 3038 and 3190 have inconsistent PRs","Prague","|Complete|","16.0"
"`3238 <https://wg21.link/LWG3238>`__","Insufficiently-defined behavior of ``std::function``\ deduction guides","Prague","|Nothing To Do|",""
"`3242 <https://wg21.link/LWG3242>`__","``std::format``\ : missing rules for ``arg-id``\ in ``width``\ and ``precision``\ ","Prague","|Complete|","14.0","|format|"
Expand Down
4 changes: 3 additions & 1 deletion libcxx/include/__expected/expected.h
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,9 @@ class expected : private __expected_base<_Tp, _Err> {
_And< is_constructible<_Tp, _UfQual>,
is_constructible<_Err, _OtherErrQual>,
_If<_Not<is_same<remove_cv_t<_Tp>, bool>>::value,
_And< _Not<is_constructible<_Tp, expected<_Up, _OtherErr>&>>,
_And<
_Not<_And<is_same<_Tp, _Up>, is_same<_Err, _OtherErr>>>, // use the copy constructor instead, see #92676
_Not<is_constructible<_Tp, expected<_Up, _OtherErr>&>>,
_Not<is_constructible<_Tp, expected<_Up, _OtherErr>>>,
_Not<is_constructible<_Tp, const expected<_Up, _OtherErr>&>>,
_Not<is_constructible<_Tp, const expected<_Up, _OtherErr>>>,
Expand Down
18 changes: 16 additions & 2 deletions libcxx/include/__memory/shared_ptr.h
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,9 @@ struct __shared_ptr_deleter_ctor_reqs {
__well_formed_deleter<_Dp, _Yp*>::value;
};

template <class _Dp>
using __shared_ptr_nullptr_deleter_ctor_reqs = _And<is_move_constructible<_Dp>, __well_formed_deleter<_Dp, nullptr_t> >;

#if defined(_LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI)
# define _LIBCPP_SHARED_PTR_TRIVIAL_ABI __attribute__((__trivial_abi__))
#else
Expand All @@ -411,6 +414,8 @@ struct __shared_ptr_deleter_ctor_reqs {

template <class _Tp>
class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS shared_ptr {
struct __nullptr_sfinae_tag {};

public:
#if _LIBCPP_STD_VER >= 17
typedef weak_ptr<_Tp> weak_type;
Expand Down Expand Up @@ -503,7 +508,11 @@ class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS shared_ptr {
}

template <class _Dp>
_LIBCPP_HIDE_FROM_ABI shared_ptr(nullptr_t __p, _Dp __d) : __ptr_(nullptr) {
_LIBCPP_HIDE_FROM_ABI shared_ptr(
nullptr_t __p,
_Dp __d,
__enable_if_t<__shared_ptr_nullptr_deleter_ctor_reqs<_Dp>::value, __nullptr_sfinae_tag> = __nullptr_sfinae_tag())
: __ptr_(nullptr) {
#ifndef _LIBCPP_HAS_NO_EXCEPTIONS
try {
#endif // _LIBCPP_HAS_NO_EXCEPTIONS
Expand All @@ -523,7 +532,12 @@ class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS shared_ptr {
}

template <class _Dp, class _Alloc>
_LIBCPP_HIDE_FROM_ABI shared_ptr(nullptr_t __p, _Dp __d, _Alloc __a) : __ptr_(nullptr) {
_LIBCPP_HIDE_FROM_ABI shared_ptr(
nullptr_t __p,
_Dp __d,
_Alloc __a,
__enable_if_t<__shared_ptr_nullptr_deleter_ctor_reqs<_Dp>::value, __nullptr_sfinae_tag> = __nullptr_sfinae_tag())
: __ptr_(nullptr) {
#ifndef _LIBCPP_HAS_NO_EXCEPTIONS
try {
#endif // _LIBCPP_HAS_NO_EXCEPTIONS
Expand Down
5 changes: 2 additions & 3 deletions libcxx/include/__ranges/iota_view.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include <__concepts/semiregular.h>
#include <__concepts/totally_ordered.h>
#include <__config>
#include <__functional/ranges_operations.h>
#include <__iterator/concepts.h>
#include <__iterator/incrementable_traits.h>
#include <__iterator/iterator_traits.h>
Expand Down Expand Up @@ -313,8 +312,8 @@ class iota_view : public view_interface<iota_view<_Start, _BoundSentinel>> {
: __value_(std::move(__value)), __bound_sentinel_(std::move(__bound_sentinel)) {
// Validate the precondition if possible.
if constexpr (totally_ordered_with<_Start, _BoundSentinel>) {
_LIBCPP_ASSERT_UNCATEGORIZED(
ranges::less_equal()(__value_, __bound_sentinel_), "Precondition violated: value is greater than bound.");
_LIBCPP_ASSERT_VALID_INPUT_RANGE(
bool(__value_ <= __bound_sentinel_), "iota_view: bound must be reachable from value");
}
}

Expand Down
16 changes: 5 additions & 11 deletions libcxx/include/__utility/pair.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
#include <__fwd/array.h>
#include <__fwd/pair.h>
#include <__fwd/tuple.h>
#include <__tuple/sfinae_helpers.h>
#include <__tuple/tuple_element.h>
#include <__tuple/tuple_indices.h>
#include <__tuple/tuple_like_no_subrange.h>
#include <__tuple/tuple_size.h>
Expand Down Expand Up @@ -130,19 +128,15 @@ struct _LIBCPP_TEMPLATE_VIS pair
}
};

template <bool _MaybeEnable>
using _CheckArgsDep _LIBCPP_NODEBUG =
typename conditional< _MaybeEnable, _CheckArgs, __check_tuple_constructor_fail>::type;

template <bool _Dummy = true, __enable_if_t<_CheckArgsDep<_Dummy>::__enable_default(), int> = 0>
explicit(!_CheckArgsDep<_Dummy>::__enable_implicit_default()) _LIBCPP_HIDE_FROM_ABI constexpr pair() noexcept(
template <bool _Dummy = true, __enable_if_t<_Dummy && _CheckArgs::__enable_default(), int> = 0>
explicit(!_CheckArgs::__enable_implicit_default()) _LIBCPP_HIDE_FROM_ABI constexpr pair() noexcept(
is_nothrow_default_constructible<first_type>::value && is_nothrow_default_constructible<second_type>::value)
: first(), second() {}

template <bool _Dummy = true,
__enable_if_t<_CheckArgsDep<_Dummy>::template __is_pair_constructible<_T1 const&, _T2 const&>(), int> = 0>
template <bool _Dummy = true,
__enable_if_t<_Dummy && _CheckArgs::template __is_pair_constructible<_T1 const&, _T2 const&>(), int> = 0>
_LIBCPP_HIDE_FROM_ABI
_LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(!_CheckArgsDep<_Dummy>::template __is_implicit<_T1 const&, _T2 const&>())
_LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(!_CheckArgs::template __is_implicit<_T1 const&, _T2 const&>())
pair(_T1 const& __t1, _T2 const& __t2) noexcept(is_nothrow_copy_constructible<first_type>::value &&
is_nothrow_copy_constructible<second_type>::value)
: first(__t1), second(__t2) {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17

// REQUIRES: has-unix-headers
// UNSUPPORTED: libcpp-hardening-mode=none
// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing

// Test the precondition check in iota_view(value, bound) that `bound` is reachable from `value`.

#include <ranges>

#include "check_assertion.h"

int main(int, char**) {
{ TEST_LIBCPP_ASSERT_FAILURE(std::ranges::iota_view(5, 0), "iota_view: bound must be reachable from value"); }
{ TEST_LIBCPP_ASSERT_FAILURE(std::ranges::iota_view(10, 5), "iota_view: bound must be reachable from value"); }

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ static_assert(!std::is_trivially_copy_constructible_v<std::expected<CopyableNonT
static_assert(!std::is_trivially_copy_constructible_v<std::expected<int, CopyableNonTrivial>>);
static_assert(!std::is_trivially_copy_constructible_v<std::expected<CopyableNonTrivial, CopyableNonTrivial>>);

struct Any {
constexpr Any() = default;
constexpr Any(const Any&) = default;
constexpr Any& operator=(const Any&) = default;

template <class T>
requires(!std::is_same_v<Any, std::decay_t<T>> && std::is_copy_constructible_v<std::decay_t<T>>)
constexpr Any(T&&) {}
};

constexpr bool test() {
// copy the value non-trivial
{
Expand Down Expand Up @@ -109,6 +119,16 @@ constexpr bool test() {
assert(!e2.has_value());
}

{
// TODO(LLVM 20): Remove once we drop support for Clang 17
#if defined(TEST_CLANG_VER) && TEST_CLANG_VER >= 1800
// https://github.com/llvm/llvm-project/issues/92676
std::expected<Any, int> e1;
auto e2 = e1;
assert(e2.has_value());
#endif
}

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "test_macros.h"
#include "deleter_types.h"

#include "types.h"
struct A
{
static int count;
Expand All @@ -28,6 +29,25 @@ struct A

int A::count = 0;

// LWG 3233. Broken requirements for shared_ptr converting constructors
// https://cplusplus.github.io/LWG/issue3233
static_assert( std::is_constructible<std::shared_ptr<int>, std::nullptr_t, test_deleter<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int>, std::nullptr_t, bad_deleter>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int>, std::nullptr_t, no_nullptr_deleter>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int>, std::nullptr_t, no_move_deleter>::value, "");

#if TEST_STD_VER >= 17
static_assert( std::is_constructible<std::shared_ptr<int[]>, std::nullptr_t, test_deleter<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, std::nullptr_t, bad_deleter>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, std::nullptr_t, no_nullptr_deleter>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, std::nullptr_t, no_move_deleter>::value, "");

static_assert( std::is_constructible<std::shared_ptr<int[5]>, std::nullptr_t, test_deleter<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, std::nullptr_t, bad_deleter>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, std::nullptr_t, no_nullptr_deleter>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, std::nullptr_t, no_move_deleter>::value, "");
#endif

int main(int, char**)
{
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "test_allocator.h"
#include "min_allocator.h"

#include "types.h"

struct A
{
static int count;
Expand All @@ -28,6 +30,25 @@ struct A

int A::count = 0;

// LWG 3233. Broken requirements for shared_ptr converting constructors
// https://cplusplus.github.io/LWG/issue3233
static_assert( std::is_constructible<std::shared_ptr<int>, std::nullptr_t, test_deleter<int>, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int>, std::nullptr_t, bad_deleter, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int>, std::nullptr_t, no_nullptr_deleter, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int>, std::nullptr_t, no_move_deleter, test_allocator<int> >::value, "");

#if TEST_STD_VER >= 17
static_assert( std::is_constructible<std::shared_ptr<int[]>, std::nullptr_t, test_deleter<int>, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, std::nullptr_t, bad_deleter, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, std::nullptr_t, no_nullptr_deleter, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, std::nullptr_t, no_move_deleter, test_allocator<int> >::value, "");

static_assert( std::is_constructible<std::shared_ptr<int[5]>, std::nullptr_t, test_deleter<int>, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, std::nullptr_t, bad_deleter, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, std::nullptr_t, no_nullptr_deleter, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, std::nullptr_t, no_move_deleter, test_allocator<int> >::value, "");
#endif

int main(int, char**)
{
test_allocator_statistics alloc_stats;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ struct A

int A::count = 0;

struct Derived : A {};
struct derived : A {};

// https://llvm.org/PR60258
// Invalid constructor SFINAE for std::shared_ptr's array ctors
static_assert( std::is_constructible<std::shared_ptr<int>, int*>::value, "");
static_assert( std::is_constructible<std::shared_ptr<A>, Derived*>::value, "");
static_assert( std::is_constructible<std::shared_ptr<A>, derived*>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<A>, int*>::value, "");

#if TEST_STD_VER >= 17
Expand Down Expand Up @@ -99,7 +99,7 @@ int main(int, char**)

{
assert(A::count == 0);
std::shared_ptr<A> pA(new Derived);
std::shared_ptr<A> pA(new derived);
assert(pA.use_count() == 1);
assert(A::count == 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "test_macros.h"
#include "deleter_types.h"

#include "types.h"

struct A
{
static int count;
Expand All @@ -28,52 +30,22 @@ struct A

int A::count = 0;

struct bad_ty { };

struct bad_deleter
{
void operator()(bad_ty) { }
};

struct no_move_deleter
{
no_move_deleter(no_move_deleter const&) = delete;
no_move_deleter(no_move_deleter &&) = delete;
void operator()(int*) { }
};

static_assert(!std::is_move_constructible<no_move_deleter>::value, "");

struct Base { };
struct Derived : Base { };

template<class T>
class MoveDeleter
{
MoveDeleter();
MoveDeleter(MoveDeleter const&);
public:
MoveDeleter(MoveDeleter&&) {}

explicit MoveDeleter(int) {}

void operator()(T* ptr) { delete ptr; }
};

// LWG 3233. Broken requirements for shared_ptr converting constructors
// https://cplusplus.github.io/LWG/issue3233
// https://llvm.org/PR60258
// Invalid constructor SFINAE for std::shared_ptr's array ctors
static_assert( std::is_constructible<std::shared_ptr<int>, int*, test_deleter<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int>, int*, bad_deleter>::value, "");
static_assert( std::is_constructible<std::shared_ptr<Base>, Derived*, test_deleter<Base> >::value, "");
static_assert( std::is_constructible<std::shared_ptr<base>, derived*, test_deleter<base> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<A>, int*, test_deleter<A> >::value, "");

#if TEST_STD_VER >= 17
static_assert( std::is_constructible<std::shared_ptr<int[]>, int*, test_deleter<int>>::value, "");
static_assert( std::is_constructible<std::shared_ptr<int[]>, int*, test_deleter<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, int*, bad_deleter>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, int(*)[], test_deleter<int>>::value, "");
static_assert( std::is_constructible<std::shared_ptr<int[5]>, int*, test_deleter<int>>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, int(*)[], test_deleter<int> >::value, "");
static_assert( std::is_constructible<std::shared_ptr<int[5]>, int*, test_deleter<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, int*, bad_deleter>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, int(*)[5], test_deleter<int>>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, int(*)[5], test_deleter<int> >::value, "");
#endif

int f() { return 5; }
Expand Down Expand Up @@ -132,17 +104,36 @@ int main(int, char**)

// Make sure that we can construct a shared_ptr where the element type and pointer type
// aren't "convertible" but are "compatible".
static_assert(!std::is_constructible<std::shared_ptr<Derived[4]>, Base[4], test_deleter<Derived[4]> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<derived[4]>, base[4], test_deleter<derived[4]> >::value, "");
}

#if TEST_STD_VER >= 11
{
MoveDeleter<int> d(0);
move_deleter<int> d(0);
std::shared_ptr<int> p0(new int, std::move(d));
std::shared_ptr<int> p1(nullptr, std::move(d));
}
#endif // TEST_STD_VER >= 11

#if TEST_STD_VER >= 14
{
// LWG 4110
auto deleter = [](auto pointer) { delete pointer; };
std::shared_ptr<int> p(new int, deleter);
}

{
std::shared_ptr<int> p(NULL, [](auto){});
}
#endif

#if TEST_STD_VER >= 17
{
// See https://github.com/llvm/llvm-project/pull/93071#issuecomment-2166047398
std::shared_ptr<char[]> a(new char[10], std::default_delete<char[]>());
}
#endif

test_function_type();
return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "test_allocator.h"
#include "min_allocator.h"

#include "types.h"
struct A
{
static int count;
Expand All @@ -28,52 +29,22 @@ struct A

int A::count = 0;

struct bad_ty { };

struct bad_deleter
{
void operator()(bad_ty) { }
};

struct no_move_deleter
{
no_move_deleter(no_move_deleter const&) = delete;
no_move_deleter(no_move_deleter &&) = delete;
void operator()(int*) { }
};

static_assert(!std::is_move_constructible<no_move_deleter>::value, "");

struct Base { };
struct Derived : Base { };

template<class T>
class MoveDeleter
{
MoveDeleter();
MoveDeleter(MoveDeleter const&);
public:
MoveDeleter(MoveDeleter&&) {}

explicit MoveDeleter(int) {}

void operator()(T* ptr) { delete ptr; }
};

// LWG 3233. Broken requirements for shared_ptr converting constructors
// https://cplusplus.github.io/LWG/issue3233
// https://llvm.org/PR60258
// Invalid constructor SFINAE for std::shared_ptr's array ctors
static_assert( std::is_constructible<std::shared_ptr<int>, int*, test_deleter<int>, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int>, int*, bad_deleter, test_allocator<int> >::value, "");
static_assert( std::is_constructible<std::shared_ptr<Base>, Derived*, test_deleter<Base>, test_allocator<Base> >::value, "");
static_assert( std::is_constructible<std::shared_ptr<base>, derived*, test_deleter<base>, test_allocator<base> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<A>, int*, test_deleter<A>, test_allocator<A> >::value, "");

#if TEST_STD_VER >= 17
static_assert( std::is_constructible<std::shared_ptr<int[]>, int*, test_deleter<int>, test_allocator<int>>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, int*, bad_deleter, test_allocator<int>>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, int(*)[], test_deleter<int>, test_allocator<int>>::value, "");
static_assert( std::is_constructible<std::shared_ptr<int[5]>, int*, test_deleter<int>, test_allocator<int>>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, int*, bad_deleter, test_allocator<int>>::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, int(*)[5], test_deleter<int>, test_allocator<int>>::value, "");
static_assert( std::is_constructible<std::shared_ptr<int[]>, int*, test_deleter<int>, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, int*, bad_deleter, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[]>, int(*)[], test_deleter<int>, test_allocator<int> >::value, "");
static_assert( std::is_constructible<std::shared_ptr<int[5]>, int*, test_deleter<int>, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, int*, bad_deleter, test_allocator<int> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<int[5]>, int(*)[5], test_deleter<int>, test_allocator<int> >::value, "");
#endif


Expand Down Expand Up @@ -172,7 +143,7 @@ int main(int, char**)
assert(test_deleter<A>::dealloc_count == 1);

{
MoveDeleter<int> d(0);
move_deleter<int> d(0);
std::shared_ptr<int> p2(new int, std::move(d), std::allocator<int>());
std::shared_ptr<int> p3(nullptr, std::move(d), std::allocator<int>());
}
Expand All @@ -189,9 +160,9 @@ int main(int, char**)

// Make sure that we can construct a shared_ptr where the element type and pointer type
// aren't "convertible" but are "compatible".
static_assert(!std::is_constructible<std::shared_ptr<Derived[4]>,
Base[4], test_deleter<Derived[4]>,
test_allocator<Derived[4]> >::value, "");
static_assert(!std::is_constructible<std::shared_ptr<derived[4]>,
base[4], test_deleter<derived[4]>,
test_allocator<derived[4]> >::value, "");
}

return 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef TEST_STD_UTILITIES_MEMORY_UTIL_SMARTPTR_SHARED_CONST_TYPES_H
#define TEST_STD_UTILITIES_MEMORY_UTIL_SMARTPTR_SHARED_CONST_TYPES_H

#include <type_traits>

struct bad_type {};

struct bad_deleter {
void operator()(bad_type) {}
};

struct no_move_deleter {
no_move_deleter(no_move_deleter const&) = delete;
no_move_deleter(no_move_deleter&&) = delete;
void operator()(int*) {}
};

static_assert(!std::is_move_constructible<no_move_deleter>::value, "");

struct no_nullptr_deleter {
void operator()(int*) const {}
void operator()(std::nullptr_t) const = delete;
};

struct base {};
struct derived : base {};

template <class T>
class move_deleter {
move_deleter();
move_deleter(move_deleter const&);

public:
move_deleter(move_deleter&&) {}

explicit move_deleter(int) {}

void operator()(T* ptr) { delete ptr; }
};

#endif // TEST_STD_UTILITIES_MEMORY_UTIL_SMARTPTR_SHARED_CONST_TYPES_H
17 changes: 9 additions & 8 deletions lld/ELF/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ void InputSection::copyRelocations(uint8_t *buf,
auto *p = reinterpret_cast<typename ELFT::Rela *>(buf);
buf += sizeof(RelTy);

if (RelTy::IsRela)
if (RelTy::HasAddend)
p->r_addend = rel.addend;

// Output section VA is zero for -r, so r_offset is an offset within the
Expand Down Expand Up @@ -452,7 +452,7 @@ void InputSection::copyRelocations(uint8_t *buf,

int64_t addend = rel.addend;
const uint8_t *bufLoc = sec->content().begin() + rel.offset;
if (!RelTy::IsRela)
if (!RelTy::HasAddend)
addend = target.getImplicitAddend(bufLoc, type);

if (config->emachine == EM_MIPS &&
Expand All @@ -471,7 +471,7 @@ void InputSection::copyRelocations(uint8_t *buf,
addend += sec->getFile<ELFT>()->mipsGp0;
}

if (RelTy::IsRela)
if (RelTy::HasAddend)
p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr;
// For SHF_ALLOC sections relocated by REL, append a relocation to
// sec->relocations so that relocateAlloc transitively called by
Expand Down Expand Up @@ -934,7 +934,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
const uint64_t offset = rel.r_offset;
uint8_t *bufLoc = buf + offset;
int64_t addend = getAddend<ELFT>(rel);
if (!RelTy::IsRela)
if (!RelTy::HasAddend)
addend += target.getImplicitAddend(bufLoc, type);

Symbol &sym = f->getRelocTargetSym(rel);
Expand Down Expand Up @@ -1007,10 +1007,11 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
}
}

// For a relocatable link, content relocated by RELA remains unchanged and
// we can stop here, while content relocated by REL referencing STT_SECTION
// needs updating implicit addends.
if (config->relocatable && (RelTy::IsRela || sym.type != STT_SECTION))
// For a relocatable link, content relocated by relocation types with an
// explicit addend, such as RELA, remain unchanged and we can stop here.
// While content relocated by relocation types with an implicit addend, such
// as REL, needs the implicit addend updated.
if (config->relocatable && (RelTy::HasAddend || sym.type != STT_SECTION))
continue;

// R_ABS/R_DTPREL and some other relocations can be used from non-SHF_ALLOC
Expand Down
5 changes: 3 additions & 2 deletions lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,8 @@ int64_t RelocationScanner::computeMipsAddend(const RelTy &rel, RelExpr expr,

// The ABI says that the paired relocation is used only for REL.
// See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
if (RelTy::IsRela)
// This generalises to relocation types with implicit addends.
if (RelTy::HasAddend)
return 0;

RelType type = rel.getType(config->isMips64EL);
Expand Down Expand Up @@ -1448,7 +1449,7 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) {
return;

RelExpr expr = target->getRelExpr(type, sym, sec->content().data() + offset);
int64_t addend = RelTy::IsRela
int64_t addend = RelTy::HasAddend
? getAddend<ELFT>(rel)
: target->getImplicitAddend(
sec->content().data() + rel.r_offset, type);
Expand Down
Loading