31 changes: 31 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12508,6 +12508,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,

SmallVector<Value*, 4> Ops;
bool IsMaskFCmp = false;
bool IsConjFMA = false;

// Find out if any arguments are required to be integer constant expressions.
unsigned ICEArguments = 0;
Expand Down Expand Up @@ -15046,6 +15047,36 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Builder.SetInsertPoint(End);
return Builder.CreateExtractValue(Call, 0);
}
case X86::BI__builtin_ia32_vfcmaddcph512_mask:
IsConjFMA = true;
LLVM_FALLTHROUGH;
case X86::BI__builtin_ia32_vfmaddcph512_mask: {
Intrinsic::ID IID = IsConjFMA
? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
: Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
return EmitX86Select(*this, Ops[3], Call, Ops[0]);
}
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
IsConjFMA = true;
LLVM_FALLTHROUGH;
case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
: Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
return EmitX86Select(*this, And, Call, Ops[0]);
}
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
IsConjFMA = true;
LLVM_FALLTHROUGH;
case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
: Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
static constexpr int Mask[] = {0, 5, 6, 7};
return Builder.CreateShuffleVector(Call, Ops[2], Mask);
}
}
}

Expand Down
43 changes: 32 additions & 11 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1560,13 +1560,22 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
}

llvm::FunctionCallee
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
bool IsGPUDistribute) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
: "__kmpc_for_static_init_4u")
: (IVSigned ? "__kmpc_for_static_init_8"
: "__kmpc_for_static_init_8u");
StringRef Name;
if (IsGPUDistribute)
Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
: "__kmpc_distribute_static_init_4u")
: (IVSigned ? "__kmpc_distribute_static_init_8"
: "__kmpc_distribute_static_init_8u");
else
Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
: "__kmpc_for_static_init_4u")
: (IVSigned ? "__kmpc_for_static_init_8"
: "__kmpc_for_static_init_8u");

llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto *PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
Expand Down Expand Up @@ -2826,7 +2835,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
: OMP_IDENT_WORK_SECTIONS);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::FunctionCallee StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
Expand All @@ -2841,8 +2850,13 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
llvm::Value *UpdatedLocation =
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::FunctionCallee StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
llvm::FunctionCallee StaticInitFunction;
bool isGPUDistribute =
CGM.getLangOpts().OpenMPIsDevice &&
(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
StaticInitFunction = createForStaticInitFunction(
Values.IVSize, Values.IVSigned, isGPUDistribute);

emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
OMPC_SCHEDULE_MODIFIER_unknown, Values);
Expand All @@ -2863,9 +2877,16 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
: OMP_IDENT_WORK_SECTIONS),
getThreadID(CGF, Loc)};
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_for_static_fini),
Args);
if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
Args);
else
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_for_static_fini),
Args);
}

void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -795,9 +795,11 @@ class CGOpenMPRuntime {
llvm::Type *getKmpc_MicroPointerTy();

/// Returns __kmpc_for_static_init_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
/// size \a IVSize and sign \a IVSigned. Will create a distribute call
/// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize,
bool IVSigned);
bool IVSigned,
bool IsGPUDistribute);

/// Returns __kmpc_dispatch_init_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
Expand Down
8 changes: 7 additions & 1 deletion clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1216,8 +1216,14 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {

static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) {
llvm::opt::ArgStringList ASL;
for (const auto *A : Args)
for (const auto *A : Args) {
// Use user's original spelling of flags. For example, use
// `/source-charset:utf-8` instead of `-finput-charset=utf-8` if the user
// wrote the former.
while (A->getAlias())
A = A->getAlias();
A->render(Args, ASL);
}

for (auto I = ASL.begin(), E = ASL.end(); I != E; ++I) {
if (I != ASL.begin())
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,7 @@ void ToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {}

llvm::SmallVector<std::string, 12>
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs) const {
return {};
}
Expand Down
24 changes: 9 additions & 15 deletions clang/lib/Driver/ToolChains/Gnu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2084,21 +2084,19 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
static const char *const AArch64LibDirs[] = {"/lib64", "/lib"};
static const char *const AArch64Triples[] = {
"aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-redhat-linux",
"aarch64-suse-linux", "aarch64-linux-android"};
"aarch64-suse-linux"};
static const char *const AArch64beLibDirs[] = {"/lib"};
static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu",
"aarch64_be-linux-gnu"};

static const char *const ARMLibDirs[] = {"/lib"};
static const char *const ARMTriples[] = {"arm-linux-gnueabi",
"arm-linux-androideabi"};
static const char *const ARMTriples[] = {"arm-linux-gnueabi"};
static const char *const ARMHFTriples[] = {"arm-linux-gnueabihf",
"armv7hl-redhat-linux-gnueabi",
"armv6hl-suse-linux-gnueabi",
"armv7hl-suse-linux-gnueabi"};
static const char *const ARMebLibDirs[] = {"/lib"};
static const char *const ARMebTriples[] = {"armeb-linux-gnueabi",
"armeb-linux-androideabi"};
static const char *const ARMebTriples[] = {"armeb-linux-gnueabi"};
static const char *const ARMebHFTriples[] = {
"armeb-linux-gnueabihf", "armebv7hl-redhat-linux-gnueabi"};

Expand All @@ -2112,17 +2110,15 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
"x86_64-redhat-linux", "x86_64-suse-linux",
"x86_64-manbo-linux-gnu", "x86_64-linux-gnu",
"x86_64-slackware-linux", "x86_64-unknown-linux",
"x86_64-amazon-linux", "x86_64-linux-android"};
"x86_64-amazon-linux"};
static const char *const X32Triples[] = {"x86_64-linux-gnux32",
"x86_64-pc-linux-gnux32"};
static const char *const X32LibDirs[] = {"/libx32", "/lib"};
static const char *const X86LibDirs[] = {"/lib32", "/lib"};
static const char *const X86Triples[] = {
"i586-linux-gnu", "i686-linux-gnu",
"i686-pc-linux-gnu", "i386-redhat-linux6E",
"i686-redhat-linux", "i386-redhat-linux",
"i586-suse-linux", "i686-montavista-linux",
"i686-linux-android", "i686-gnu",
"i586-linux-gnu", "i686-linux-gnu", "i686-pc-linux-gnu",
"i386-redhat-linux6E", "i686-redhat-linux", "i386-redhat-linux",
"i586-suse-linux", "i686-montavista-linux", "i686-gnu",
};

static const char *const M68kLibDirs[] = {"/lib"};
Expand All @@ -2135,8 +2131,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
"mips-img-linux-gnu", "mipsisa32r6-linux-gnu"};
static const char *const MIPSELLibDirs[] = {"/lib"};
static const char *const MIPSELTriples[] = {
"mipsel-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6el-linux-gnu",
"mipsel-linux-android"};
"mipsel-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6el-linux-gnu"};

static const char *const MIPS64LibDirs[] = {"/lib64", "/lib"};
static const char *const MIPS64Triples[] = {
Expand All @@ -2147,8 +2142,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
static const char *const MIPS64ELTriples[] = {
"mips64el-linux-gnu", "mips-mti-linux-gnu",
"mips-img-linux-gnu", "mips64el-linux-gnuabi64",
"mipsisa64r6el-linux-gnu", "mipsisa64r6el-linux-gnuabi64",
"mips64el-linux-android"};
"mipsisa64r6el-linux-gnu", "mipsisa64r6el-linux-gnuabi64"};

static const char *const MIPSN32LibDirs[] = {"/lib32"};
static const char *const MIPSN32Triples[] = {"mips64-linux-gnuabin32",
Expand Down
26 changes: 14 additions & 12 deletions clang/lib/Driver/ToolChains/HIP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,

if (Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
false))
llvm::for_each(TC.getHIPDeviceLibs(Args), [&](StringRef BCFile) {
LldArgs.push_back(Args.MakeArgString(BCFile));
llvm::for_each(TC.getHIPDeviceLibs(Args), [&](auto BCFile) {
LldArgs.push_back(Args.MakeArgString(BCFile.Path));
});

const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
Expand Down Expand Up @@ -276,9 +276,10 @@ void HIPToolChain::addClangTargetOptions(
CC1Args.push_back("-fapply-global-visibility-to-externs");
}

llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](StringRef BCFile) {
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](auto BCFile) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
});
}

Expand Down Expand Up @@ -359,9 +360,9 @@ VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
return HostTC.computeMSVCVersion(D, Args);
}

llvm::SmallVector<std::string, 12>
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
llvm::SmallVector<std::string, 12> BCLibs;
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
if (DriverArgs.hasArg(options::OPT_nogpulib))
return {};
ArgStringList LibraryPaths;
Expand All @@ -382,7 +383,7 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
llvm::sys::path::append(Path, BCName);
FullName = Path;
if (llvm::sys::fs::exists(FullName)) {
BCLibs.push_back(FullName.str());
BCLibs.push_back(FullName);
return;
}
}
Expand All @@ -409,22 +410,23 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
getDriver().Diag(DiagID);
return {};
} else
BCLibs.push_back(AsanRTL.str());
BCLibs.push_back({AsanRTL.str(), /*ShouldInternalize=*/false});
}

// Add the HIP specific bitcode library.
BCLibs.push_back(RocmInstallation.getHIPPath().str());
BCLibs.push_back(RocmInstallation.getHIPPath());

// Add common device libraries like ocml etc.
BCLibs.append(getCommonDeviceLibNames(DriverArgs, GpuArch.str()));
for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
BCLibs.push_back(StringRef(N));

// Add instrument lib.
auto InstLib =
DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ);
if (InstLib.empty())
return BCLibs;
if (llvm::sys::fs::exists(InstLib))
BCLibs.push_back(InstLib.str());
BCLibs.push_back(InstLib);
else
getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
}
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChains/HIP.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public ROCMToolChain {
llvm::opt::ArgStringList &CC1Args) const override;
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
llvm::SmallVector<std::string, 12>
llvm::SmallVector<BitCodeLibraryInfo, 12>
getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override;

SanitizerMask getSupportedSanitizers() const override;
Expand Down
119 changes: 52 additions & 67 deletions clang/lib/Headers/avx512fp16intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2941,11 +2941,8 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_sch(__m128h __A,

static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
return (__m128h)__builtin_ia32_selectps_128(
__U,
__builtin_ia32_vfcmaddcsh_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION),
(__v4sf)__A);
return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask(
(__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m128h __DEFAULT_FN_ATTRS128
Expand All @@ -2957,10 +2954,8 @@ _mm_maskz_fcmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {

static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
return (__m128h)_mm_move_ss((__m128)__C,
(__m128)__builtin_ia32_vfcmaddcsh_mask(
(__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U,
_MM_FROUND_CUR_DIRECTION));
return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask3(
(__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
}

#define _mm_fcmadd_round_sch(A, B, C, R) \
Expand All @@ -2969,22 +2964,19 @@ _mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
(__mmask8)-1, (int)(R)))

#define _mm_mask_fcmadd_round_sch(A, U, B, C, R) \
((__m128h)__builtin_ia32_selectps_128( \
(__mmask8)(U & 1), \
__builtin_ia32_vfcmaddcsh_mask( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)(U), (int)(R)), \
(__v4sf)(__m128h)(A)))
((__m128h)__builtin_ia32_vfcmaddcsh_round_mask( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)(U), (int)(R)))

#define _mm_maskz_fcmadd_round_sch(U, A, B, C, R) \
((__m128h)__builtin_ia32_vfcmaddcsh_maskz( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)(U), (int)(R)))

#define _mm_mask3_fcmadd_round_sch(A, B, C, U, R) \
((__m128h)_mm_move_ss((__m128)(C), \
(__m128)__builtin_ia32_vfcmaddcsh_mask( \
(__v4sf)(A), (__v4sf)(B), (__v4sf)(C), (U), (R))))
((__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A,
__m128h __B,
Expand All @@ -2996,11 +2988,8 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A,

static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
return (__m128h)__builtin_ia32_selectps_128(
__U,
__builtin_ia32_vfmaddcsh_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION),
(__v4sf)__A);
return (__m128h)__builtin_ia32_vfmaddcsh_round_mask(
(__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m128h __DEFAULT_FN_ATTRS128
Expand All @@ -3010,24 +2999,32 @@ _mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
return (__m128h)__builtin_ia32_vfmaddcsh_round_mask3(
(__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
}

#define _mm_fmadd_round_sch(A, B, C, R) \
((__m128h)__builtin_ia32_vfmaddcsh_mask( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)-1, (int)(R)))

#define _mm_mask_fmadd_round_sch(A, U, B, C, R) \
((__m128h)__builtin_ia32_selectps_128( \
(__mmask8)(U & 1), \
__builtin_ia32_vfmaddcsh_mask( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)(U), (int)(R)), \
(__v4sf)(__m128h)(A)))
((__m128h)__builtin_ia32_vfmaddcsh_round_mask( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)(U), (int)(R)))

#define _mm_maskz_fmadd_round_sch(U, A, B, C, R) \
((__m128h)__builtin_ia32_vfmaddcsh_maskz( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)(U), (int)(R)))

#define _mm_mask3_fmadd_round_sch(A, B, C, U, R) \
((__m128h)__builtin_ia32_vfmaddcsh_round_mask3( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_sch(__m128h __A,
__m128h __B) {
return (__m128h)__builtin_ia32_vfcmulcsh_mask(
Expand Down Expand Up @@ -3177,24 +3174,21 @@ _mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmadd_pch(__m512h __A,
__m512h __B,
__m512h __C) {
return (__m512h)__builtin_ia32_vfcmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
(__v16sf)__C, (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION);
return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
(__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS512
_mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
return (__m512h)__builtin_ia32_selectps_512(
__U,
__builtin_ia32_vfcmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
(__v16sf)__C, (__mmask16)__U,
_MM_FROUND_CUR_DIRECTION),
(__v16sf)__A);
return (__m512h)__builtin_ia32_vfcmaddcph512_mask(
(__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS512
_mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
return (__m512h)__builtin_ia32_vfcmaddcph512_mask(
return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
(__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
_MM_FROUND_CUR_DIRECTION);
}
Expand All @@ -3207,20 +3201,17 @@ _mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
}

#define _mm512_fcmadd_round_pch(A, B, C, R) \
((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)-1, (int)(R)))

#define _mm512_mask_fcmadd_round_pch(A, U, B, C, R) \
((__m512h)__builtin_ia32_selectps_512( \
(__mmask16)(U), \
__builtin_ia32_vfcmaddcph512_mask( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)(U), (int)(R)), \
(__v16sf)(__m512h)(A)))
((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)(U), (int)(R)))

#define _mm512_mask3_fcmadd_round_pch(A, B, C, U, R) \
((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)(U), (int)(R)))

Expand All @@ -3232,26 +3223,23 @@ _mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_pch(__m512h __A,
__m512h __B,
__m512h __C) {
return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
(__v16sf)__C, (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION);
return (__m512h)__builtin_ia32_vfmaddcph512_mask3((__v16sf)__A, (__v16sf)__B,
(__v16sf)__C, (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS512
_mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
return (__m512h)__builtin_ia32_selectps_512(
__U,
__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B, (__v16sf)__C,
(__mmask16)__U,
_MM_FROUND_CUR_DIRECTION),
(__v16sf)__A);
return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
(__v16sf)__C, (__mmask16)__U,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS512
_mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
(__v16sf)__C, (__mmask16)__U,
_MM_FROUND_CUR_DIRECTION);
return (__m512h)__builtin_ia32_vfmaddcph512_mask3(
(__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
_MM_FROUND_CUR_DIRECTION);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS512
Expand All @@ -3262,20 +3250,17 @@ _mm512_maskz_fmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
}

#define _mm512_fmadd_round_pch(A, B, C, R) \
((__m512h)__builtin_ia32_vfmaddcph512_mask( \
((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)-1, (int)(R)))

#define _mm512_mask_fmadd_round_pch(A, U, B, C, R) \
((__m512h)__builtin_ia32_selectps_512( \
(__mmask16)(U), \
__builtin_ia32_vfmaddcph512_mask( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)(U), (int)(R)), \
(__v16sf)(__m512h)(A)))
((__m512h)__builtin_ia32_vfmaddcph512_mask( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)(U), (int)(R)))

#define _mm512_mask3_fmadd_round_pch(A, B, C, U, R) \
((__m512h)__builtin_ia32_vfmaddcph512_mask( \
((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)(U), (int)(R)))

Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Lex/LiteralSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1654,9 +1654,9 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
///
StringLiteralParser::
StringLiteralParser(ArrayRef<Token> StringToks,
Preprocessor &PP, bool Complain)
Preprocessor &PP)
: SM(PP.getSourceManager()), Features(PP.getLangOpts()),
Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr),
Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
init(StringToks);
Expand Down
16 changes: 16 additions & 0 deletions clang/lib/Lex/PPDirectives.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2865,6 +2865,12 @@ void Preprocessor::HandleDefineDirective(
if (MacroNameTok.is(tok::eod))
return;

IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
// Issue a final pragma warning if we're defining a macro that was has been
// undefined and is being redefined.
if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);

// If we are supposed to keep comments in #defines, reenable comment saving
// mode.
if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
Expand Down Expand Up @@ -2907,6 +2913,12 @@ void Preprocessor::HandleDefineDirective(
// Finally, if this identifier already had a macro defined for it, verify that
// the macro bodies are identical, and issue diagnostics if they are not.
if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
// Final macros are hard-mode: they always warn. Even if the bodies are
// identical. Even if they are in system headers. Even if they are things we
// would silently allow in the past.
if (MacroNameTok.getIdentifierInfo()->isFinal())
emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);

// In Objective-C, ignore attempts to directly redefine the builtin
// definitions of the ownership qualifiers. It's still possible to
// #undef them.
Expand Down Expand Up @@ -2936,6 +2948,7 @@ void Preprocessor::HandleDefineDirective(
// then don't bother calling MacroInfo::isIdenticalTo.
if (!getDiagnostics().getSuppressSystemWarnings() ||
!SourceMgr.isInSystemHeader(DefineTok.getLocation())) {

if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);

Expand Down Expand Up @@ -3013,6 +3026,9 @@ void Preprocessor::HandleUndefDirective() {
auto MD = getMacroDefinition(II);
UndefMacroDirective *Undef = nullptr;

if (II->isFinal())
emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);

// If the macro is not defined, this is a noop undef.
if (const MacroInfo *MI = MD.getMacroInfo()) {
if (!MI->isUsed() && MI->isWarnIfUnused())
Expand Down
48 changes: 45 additions & 3 deletions clang/lib/Lex/Pragma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1990,7 +1990,7 @@ static IdentifierInfo *HandleMacroAnnotationPragma(Preprocessor &PP, Token &Tok,
IdentifierInfo *II = Tok.getIdentifierInfo();

if (!II->hasMacroDefinition()) {
PP.Diag(Tok, diag::err_pp_visibility_non_macro) << II->getName();
PP.Diag(Tok, diag::err_pp_visibility_non_macro) << II;
return nullptr;
}

Expand Down Expand Up @@ -2025,8 +2025,8 @@ struct PragmaDeprecatedHandler : public PragmaHandler {
if (IdentifierInfo *II = HandleMacroAnnotationPragma(
PP, Tok, "#pragma clang deprecated", MessageString)) {
II->setIsDeprecatedMacro(true);
if (!MessageString.empty())
PP.addMacroDeprecationMsg(II, std::move(MessageString));
PP.addMacroDeprecationMsg(II, std::move(MessageString),
Tok.getLocation());
}
}
};
Expand All @@ -2053,6 +2053,47 @@ struct PragmaRestrictExpansionHandler : public PragmaHandler {
}
};

/// "\#pragma clang final(...)"
///
/// The syntax is
/// \code
/// #pragma clang final(MACRO_NAME)
/// \endcode
struct PragmaFinalHandler : public PragmaHandler {
PragmaFinalHandler() : PragmaHandler("final") {}

void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
Token &Tok) override {
std::string Macro;

PP.Lex(Tok);
if (Tok.isNot(tok::l_paren)) {
PP.Diag(Tok, diag::err_expected) << "(";
return;
}

PP.LexUnexpandedToken(Tok);
if (!Tok.is(tok::identifier)) {
PP.Diag(Tok, diag::err_expected) << tok::identifier;
return;
}
IdentifierInfo *II = Tok.getIdentifierInfo();

if (!II->hasMacroDefinition()) {
PP.Diag(Tok, diag::err_pp_visibility_non_macro) << II;
return;
}

PP.Lex(Tok);
if (Tok.isNot(tok::r_paren)) {
PP.Diag(Tok, diag::err_expected) << ")";
return;
}
II->setIsFinal(true);
PP.addFinalLoc(II, Tok.getLocation());
}
};

} // namespace

/// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
Expand Down Expand Up @@ -2084,6 +2125,7 @@ void Preprocessor::RegisterBuiltinPragmas() {
AddPragmaHandler("clang", new PragmaAssumeNonNullHandler());
AddPragmaHandler("clang", new PragmaDeprecatedHandler());
AddPragmaHandler("clang", new PragmaRestrictExpansionHandler());
AddPragmaHandler("clang", new PragmaFinalHandler());

// #pragma clang module ...
auto *ModuleHandler = new PragmaNamespace("module");
Expand Down
39 changes: 30 additions & 9 deletions clang/lib/Lex/Preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1409,25 +1409,46 @@ bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
return true;
}

void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) {
auto DepMsg = getMacroDeprecationMsg(Identifier.getIdentifierInfo());
if (!DepMsg)
void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
const MacroAnnotations &A =
getMacroAnnotations(Identifier.getIdentifierInfo());
assert(A.DeprecationInfo &&
"Macro deprecation warning without recorded annotation!");
const MacroAnnotationInfo &Info = *A.DeprecationInfo;
if (Info.Message.empty())
Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
<< Identifier.getIdentifierInfo() << 0;
else
Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
<< Identifier.getIdentifierInfo() << 1 << *DepMsg;
<< Identifier.getIdentifierInfo() << 1 << Info.Message;
Diag(Info.Location, diag::note_pp_macro_annotation) << 0;
}

void Preprocessor::emitMacroUnsafeHeaderWarning(const Token &Identifier) {
auto DepMsg = getRestrictExpansionMsg(Identifier.getIdentifierInfo());
if (DepMsg.first.empty())
void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
const MacroAnnotations &A =
getMacroAnnotations(Identifier.getIdentifierInfo());
assert(A.RestrictExpansionInfo &&
"Macro restricted expansion warning without recorded annotation!");
const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
if (Info.Message.empty())
Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
<< Identifier.getIdentifierInfo() << 0;
else
Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
<< Identifier.getIdentifierInfo() << 1 << DepMsg.first;
Diag(DepMsg.second, diag::note_pp_macro_annotation) << 1;
<< Identifier.getIdentifierInfo() << 1 << Info.Message;
Diag(Info.Location, diag::note_pp_macro_annotation) << 1;
}

void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
bool IsUndef) const {
const MacroAnnotations &A =
getMacroAnnotations(Identifier.getIdentifierInfo());
assert(A.FinalAnnotationLoc &&
"Final macro warning without recorded annotation!");

Diag(Identifier, diag::warn_pragma_final_macro)
<< Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
Diag(*A.FinalAnnotationLoc, diag::note_pp_macro_annotation) << 2;
}

ModuleLoader::~ModuleLoader() = default;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4129,11 +4129,17 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
case X86::BI__builtin_ia32_vfmaddcsh_mask:
case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
case X86::BI__builtin_ia32_vfmaddcph512_mask:
case X86::BI__builtin_ia32_vfmaddcph512_maskz:
case X86::BI__builtin_ia32_vfmaddcph512_mask3:
case X86::BI__builtin_ia32_vfcmaddcsh_mask:
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
case X86::BI__builtin_ia32_vfcmaddcph512_mask:
case X86::BI__builtin_ia32_vfcmaddcph512_maskz:
case X86::BI__builtin_ia32_vfcmaddcph512_mask3:
case X86::BI__builtin_ia32_vfmulcsh_mask:
case X86::BI__builtin_ia32_vfmulcph512_mask:
case X86::BI__builtin_ia32_vfcmulcsh_mask:
Expand Down
30 changes: 24 additions & 6 deletions clang/test/CodeGen/X86/avx512fp16-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -4086,10 +4086,8 @@ __m128h test_mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.csh(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 4)
// CHECK: %{{.*}} = extractelement <4 x float> %{{.*}}, i32 0
// CHECK: %{{.*}} = insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
// CHECK: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
// CHECK: %{{.*}} = bitcast <4 x float> %{{.*}} to <8 x half>
return _mm_mask3_fcmadd_sch(__A, __B, __C, __U);
}
Expand Down Expand Up @@ -4119,10 +4117,8 @@ __m128h test_mm_mask3_fcmadd_round_sch(__m128h __A, __m128h __B, __m128h __C, __
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.csh(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 11)
// CHECK: %{{.*}} = extractelement <4 x float> %{{.*}}, i32 0
// CHECK: %{{.*}} = insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
// CHECK: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
// CHECK: %{{.*}} = bitcast <4 x float> %{{.*}} to <8 x half>
return _mm_mask3_fcmadd_round_sch(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
Expand All @@ -4147,6 +4143,17 @@ __m128h test_mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h
return _mm_maskz_fmadd_sch(__U, __A, __B, __C);
}

__m128h test_mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm_mask3_fmadd_sch
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.csh(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 4)
// CHECK: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
// CHECK: %{{.*}} = bitcast <4 x float> %{{.*}} to <8 x half>
return _mm_mask3_fmadd_sch(__A, __B, __C, __U);
}

__m128h test_mm_fmadd_round_sch(__m128h __A, __m128h __B, __m128h __C) {
// CHECK-LABEL: @test_mm_fmadd_round_sch
// CHECK: @llvm.x86.avx512fp16.mask.vfmadd.csh
Expand All @@ -4167,6 +4174,17 @@ __m128h test_mm_maskz_fmadd_round_sch(__mmask8 __U, __m128h __A, __m128h __B, __
return _mm_maskz_fmadd_round_sch(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}

__m128h test_mm_mask3_fmadd_round_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm_mask3_fmadd_round_sch
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.csh(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 11)
// CHECK: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
// CHECK: %{{.*}} = bitcast <4 x float> %{{.*}} to <8 x half>
return _mm_mask3_fmadd_round_sch(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}

__m128h test_mm_fcmul_sch(__m128h __A, __m128h __B) {
// CHECK-LABEL: @test_mm_fcmul_sch
// CHECK: @llvm.x86.avx512fp16.mask.vfcmul.csh
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s

; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
; RUN: llvm-lto2 run -thinlto-distributed-indexes %t.o \
; RUN: llvm-lto2 run -thinlto-distributed-indexes -disable-thinlto-funcattrs=0 %t.o \
; RUN: -whole-program-visibility \
; RUN: -verify-machineinstrs=0 \
; RUN: -o %t2.index \
Expand Down Expand Up @@ -36,7 +36,7 @@
; Round trip it through llvm-as
; RUN: llvm-dis %t.o.thinlto.bc -o - | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-DIS
; CHECK-DIS: ^0 = module: (path: "{{.*}}thinlto-distributed-cfi-devirt.ll.tmp.o", hash: ({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}))
; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 18, typeIdInfo: (typeTests: (^2), typeCheckedLoadVCalls: (vFuncId: (^2, offset: 8), vFuncId: (^2, offset: 0))))))
; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 18, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1), typeIdInfo: (typeTests: (^2), typeCheckedLoadVCalls: (vFuncId: (^2, offset: 8), vFuncId: (^2, offset: 0))))))
; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7), wpdResolutions: ((offset: 0, wpdRes: (kind: branchFunnel)), (offset: 8, wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi"))))) ; guid = 7004155349499253778

; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/thinlto-distributed-cfi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s

; RUN: llvm-lto2 run -thinlto-distributed-indexes %t.o \
; RUN: llvm-lto2 run -thinlto-distributed-indexes -disable-thinlto-funcattrs=0 %t.o \
; RUN: -o %t2.index \
; RUN: -r=%t.o,test,px \
; RUN: -r=%t.o,_ZTV1B, \
Expand All @@ -24,7 +24,7 @@
; Round trip it through llvm-as
; RUN: llvm-dis %t.o.thinlto.bc -o - | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=CHECK-DIS
; CHECK-DIS: ^0 = module: (path: "{{.*}}thinlto-distributed-cfi.ll.tmp.o", hash: ({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}))
; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 7, typeIdInfo: (typeTests: (^2)))))
; CHECK-DIS: ^1 = gv: (guid: 8346051122425466633, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 7, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0), typeIdInfo: (typeTests: (^2)))))
; CHECK-DIS: ^2 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: single, sizeM1BitWidth: 0))) ; guid = 7004155349499253778

; RUN: %clang_cc1 -triple x86_64-grtev4-linux-gnu \
Expand Down
43 changes: 43 additions & 0 deletions clang/test/CodeGen/thinlto-funcattr-prop.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
; REQUIRES: x86-registered-target

; Test that FunctionAttr Propagation is generating correct summaries

; RUN: split-file %s %t
; RUN: opt -module-summary %t/a.ll -o %t/a.bc
; RUN: opt -module-summary %t/b.ll -o %t/b.bc

; RUN: llvm-lto2 run -disable-thinlto-funcattrs=0 %t/a.bc %t/b.bc -o %t1.o -save-temps \
; RUN: -r=%t/a.bc,call_extern,plx \
; RUN: -r=%t/a.bc,extern, \
; RUN: -r=%t/b.bc,extern,p

; RUN: llvm-dis %t1.o.index.bc -o - | FileCheck %s --check-prefix=CHECK-INDEX
; RUN: llvm-dis %t1.o.1.1.promote.bc -o - | FileCheck %s --check-prefix=CHECK-IR

;; Summary for call_extern. Note that llvm-lto2 writes out the index before propagation occurs so call_extern doesn't have its flags updated.
; CHECK-INDEX: ^2 = gv: (guid: 13959900437860518209, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0), insts: 2, calls: ((callee: ^3)))))
;; Summary for extern
; CHECK-INDEX: ^3 = gv: (guid: 14959766916849974397, summaries: (function: (module: ^1, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 0, canAutoHide: 0), insts: 1, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 1, mayThrow: 0, hasUnknownCall: 0))))

;--- a.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

declare void @extern()

; CHECK-IR: Function Attrs: norecurse nounwind
; CHECK-IR-NEXT: define dso_local void @call_extern()
define void @call_extern() {
call void @extern()
ret void
}

;--- b.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

attributes #0 = { nounwind norecurse }

define void @extern() #0 {
ret void
}
13 changes: 13 additions & 0 deletions clang/test/CodeGenCUDA/Inputs/amdgpu-asanrtl.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
; Sample code for amdgpu address sanitizer runtime.

; Note the runtime functions need to have weak linkage and default
; visibility, otherwise they may be internalized and removed by GlobalOptPass.

define weak void @__amdgpu_device_library_preserve_asan_functions() {
tail call void @__asan_report_load1(i64 0)
ret void
}

define weak void @__asan_report_load1(i64 %0) {
ret void
}
22 changes: 19 additions & 3 deletions clang/test/CodeGenCUDA/amdgpu-asan.cu
Original file line number Diff line number Diff line change
@@ -1,15 +1,31 @@
// Create a sample address sanitizer bitcode library.

// RUN: %clang_cc1 -x ir -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
// RUN: -disable-llvm-passes -o %t.asanrtl.bc %S/Inputs/amdgpu-asanrtl.ll

// Check sanitizer runtime library functions survive
// optimizations without being removed or parameters altered.

// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
// RUN: -mlink-bitcode-file %t.asanrtl.bc -x hip \
// RUN: | FileCheck -check-prefix=ASAN %s

// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
// RUN: -x hip | FileCheck -check-prefix=ASAN %s
// RUN: -O3 -mlink-bitcode-file %t.asanrtl.bc -x hip \
// RUN: | FileCheck -check-prefix=ASAN %s

// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -x hip \
// RUN: | FileCheck %s

// REQUIRES: amdgpu-registered-target

// ASAN-DAG: declare void @__amdgpu_device_library_preserve_asan_functions()
// ASAN-DAG: define weak void @__amdgpu_device_library_preserve_asan_functions()
// ASAN-DAG: @__amdgpu_device_library_preserve_asan_functions_ptr = weak addrspace(1) constant void ()* @__amdgpu_device_library_preserve_asan_functions
// ASAN-DAG: @llvm.compiler.used = {{.*}}@__amdgpu_device_library_preserve_asan_functions_ptr
// ASAN-DAG: define weak void @__asan_report_load1(i64 %{{.*}})

// CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions_ptr
// CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions
// CHECK-NOT: @__asan_report_load1
8 changes: 0 additions & 8 deletions clang/test/Driver/android-gcc-toolchain.c

This file was deleted.

6 changes: 3 additions & 3 deletions clang/test/Driver/compilation_database.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: mkdir -p %t.workdir && cd %t.workdir
// RUN: %clang -MD -MP --sysroot=somewhere -c -x c %s -xc++ %s -Wall -MJ - -no-canonical-prefixes 2>&1 | FileCheck %s
// RUN: %clang -fintegrated-as -MD -MP --sysroot=somewhere -c -x c %s -xc++ %s -Wall -MJ - -no-canonical-prefixes 2>&1 | FileCheck %s
// RUN: not %clang -c -x c %s -MJ %s/non-existant -no-canonical-prefixes 2>&1 | FileCheck --check-prefix=ERROR %s

// CHECK: { "directory": "{{[^"]*}}workdir", "file": "[[SRC:[^"]+[/|\\]compilation_database.c]]", "output": "compilation_database.o", "arguments": ["{{[^"]*}}clang{{[^"]*}}", "-xc", "[[SRC]]", "--sysroot=somewhere", "-c", "-Wall",{{.*}} "--target={{[^"]+}}"]},
// CHECK: { "directory": "{{.*}}", "file": "[[SRC:[^"]+[/|\\]compilation_database.c]]", "output": "compilation_database.o", "arguments": ["{{[^"]*}}clang{{[^"]*}}", "-xc++", "[[SRC]]", "--sysroot=somewhere", "-c", "-Wall",{{.*}} "--target={{[^"]+}}"]},
// CHECK: { "directory": "{{[^"]*}}workdir", "file": "[[SRC:[^"]+[/|\\]compilation_database.c]]", "output": "compilation_database.o", "arguments": ["{{[^"]*}}clang{{[^"]*}}", "-xc", "[[SRC]]", "-fintegrated-as", "--sysroot=somewhere", "-c", "-Wall",{{.*}} "--target={{[^"]+}}"]},
// CHECK: { "directory": "{{.*}}", "file": "[[SRC:[^"]+[/|\\]compilation_database.c]]", "output": "compilation_database.o", "arguments": ["{{[^"]*}}clang{{[^"]*}}", "-xc++", "[[SRC]]", "-fintegrated-as", "--sysroot=somewhere", "-c", "-Wall",{{.*}} "--target={{[^"]+}}"]},
// ERROR: error: compilation database '{{.*}}/non-existant' could not be opened:

int main(void) {
Expand Down
24 changes: 24 additions & 0 deletions clang/test/Driver/crash-report-clang-cl.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: rm -rf %t
// RUN: mkdir %t

// RUN: env TMPDIR=%t TEMP=%t TMP=%t RC_DEBUG_OPTIONS=1 \
// RUN: not %clang_cl -fsyntax-only /Brepro /source-charset:utf-8 \
// RUN: -- %s 2>&1 | FileCheck %s
// RUN: cat %t/crash-report-*.sh | FileCheck --check-prefix=CHECKSH %s

// REQUIRES: crash-recovery

#pragma clang __debug crash

// CHECK: Preprocessed source(s) and associated run script(s) are located at:
// CHECK-NEXT: note: diagnostic msg: {{.*}}crash-report-clang-cl-{{.*}}.c
// CHECKSH: # Crash reproducer
// CHECKSH-NEXT: # Driver args: {{.*}}"-fsyntax-only"
// CHECKSH-SAME: /Brepro
// CHECKSH-SAME: /source-charset:utf-8
// CHECKSH-NOT: -mno-incremental-linker-compatible
// CHECKSH-NOT: -finput-charset=utf-8
// CHECKSH-NEXT: # Original command: {{.*$}}
// CHECKSH-NEXT: "-cc1"
// CHECKSH: "-main-file-name" "crash-report-clang-cl.c"
// CHECKSH: "crash-report-{{[^ ]*}}.c"
2 changes: 1 addition & 1 deletion clang/test/Driver/hip-sanitize-options.hip
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
// CHECK-NOT: {{"[^"]*lld(\.exe){0,1}".* ".*hip.bc"}}
// CHECK: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}

// NORDC: {{"[^"]*clang[^"]*".* "-emit-obj".* "-fcuda-is-device".* "-fsanitize=address".*}} "-o" "[[OUT:[^"]*.o]]"
// NORDC: {{"[^"]*clang[^"]*".* "-emit-obj".* "-fcuda-is-device".* "-mlink-bitcode-file" ".*asanrtl.bc".* "-mlink-builtin-bitcode" ".*hip.bc".* "-fsanitize=address".*}} "-o" "[[OUT:[^"]*.o]]"
// NORDC: {{"[^"]*lld(\.exe){0,1}".*}} "[[OUT]]" {{".*asanrtl.bc" ".*hip.bc"}}
// NORDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}}

Expand Down
4 changes: 4 additions & 0 deletions clang/test/Lexer/Inputs/final-macro.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// expected-warning@+2{{macro 'Foo' has been marked as deprecated}}
// expected-warning@+1{{macro 'Foo' has been marked as unsafe for use in headers}}
#if Foo
#endif
2 changes: 1 addition & 1 deletion clang/test/Lexer/Inputs/unsafe-macro.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// expected-error@+1{{expected identifier}}
#pragma clang restrict_expansion(4

// expected-error@+1{{no macro named foo}}
// expected-error@+1{{no macro named 'foo'}}
#pragma clang restrict_expansion(foo)


Expand Down
13 changes: 11 additions & 2 deletions clang/test/Lexer/deprecate-macro.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,26 @@
// expected-error@+1{{expected identifier}}
#pragma clang deprecated(4

// expected-error@+1{{no macro named foo}}
// expected-error@+1{{no macro named 'foo'}}
#pragma clang deprecated(foo)

#define bar 1
// expected-note@+1{{macro marked 'deprecated' here}}
#pragma clang deprecated(bar, "bar is deprecated use 1")

// expected-warning@+1{{macro 'bar' has been marked as deprecated: bar is deprecated use 1}}
#if bar
#endif

#define foo 1
// expected-note@+8{{macro marked 'deprecated' here}}
// expected-note@+7{{macro marked 'deprecated' here}}
// expected-note@+6{{macro marked 'deprecated' here}}
// expected-note@+5{{macro marked 'deprecated' here}}
// expected-note@+4{{macro marked 'deprecated' here}}
// expected-note@+3{{macro marked 'deprecated' here}}
// expected-note@+2{{macro marked 'deprecated' here}}
// expected-note@+1{{macro marked 'deprecated' here}}
#pragma clang deprecated(foo)

// expected-error@+1{{expected )}}
Expand All @@ -39,7 +48,7 @@
#endif

int main(int argc, char** argv) {
// expected-error@+1{{no macro named main}}
// expected-error@+1{{no macro named 'main'}}
#pragma clang deprecated(main)

// expected-warning@+1{{macro 'foo' has been marked as deprecated}}
Expand Down
45 changes: 45 additions & 0 deletions clang/test/Lexer/final-macro.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// RUN: %clang_cc1 -Wfinal-macro %s -fsyntax-only -verify

// Test warning production
#define Foo 1
// expected-note@+1 4{{macro marked 'final' here}}
#pragma clang final(Foo)

// expected-warning@+2{{macro 'Foo' has been marked as final and should not be redefined}}
// expected-note@+1{{previous definition is here}}
#define Foo 1

// expected-warning@+2{{macro 'Foo' has been marked as final and should not be redefined}}
// expected-warning@+1{{'Foo' macro redefined}}
#define Foo 2

// expected-warning@+1{{redefining builtin macro}}
#define __TIME__ 1

// expected-warning@+1{{undefining builtin macro}}
#undef __TIMESTAMP__

// expected-warning@+1{{macro 'Foo' has been marked as final and should not be undefined}}
#undef Foo
// expected-warning@+1{{macro 'Foo' has been marked as final and should not be redefined}}
#define Foo 3

// Test parse errors
// expected-error@+1{{expected (}}
#pragma clang final

// expected-error@+1{{expected )}}
#pragma clang final(Foo

// expected-error@+1{{no macro named 'Baz'}}
#pragma clang final(Baz)

// expected-error@+1{{expected identifier}}
#pragma clang final(4)

// expected-error@+1{{expected (}}
#pragma clang final Baz

// no diagnostics triggered by these pragmas.
#pragma clang deprecated(Foo)
#pragma clang restrict_expansion(Foo)
15 changes: 14 additions & 1 deletion clang/test/Lexer/pedantic-macro-interplay.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,17 @@
// not-expected-warning@+1{{macro 'UNSAFE_MACRO_2' has been marked as deprecated: Don't use this!}}
#pragma clang restrict_expansion(UNSAFE_MACRO_2, "Don't use this!")

// expected-no-diagnostics

#define Foo 1
#pragma clang final(Foo)
// expected-note@+2{{macro marked 'deprecated' here}}
// expected-note@+1{{macro marked 'deprecated' here}}
#pragma clang deprecated(Foo)
// expected-note@+1{{macro marked 'restrict_expansion' here}}
#pragma clang restrict_expansion(Foo)

// Test that unsafe_header and deprecated markings stick around after the undef
#include "Inputs/final-macro.h"

// expected-warning@+1{{macro 'Foo' has been marked as deprecated}}
const int X = Foo;

Large diffs are not rendered by default.

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
Expand Down Expand Up @@ -656,7 +656,7 @@ int bar(int n){
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[I]], i64 4)
// CHECK1-NEXT: ret void
//
Expand Down Expand Up @@ -737,7 +737,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
Expand Down Expand Up @@ -778,7 +778,7 @@ int bar(int n){
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[I]], i32 4)
// CHECK2-NEXT: ret void
//
Expand Down Expand Up @@ -859,7 +859,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
Expand Down Expand Up @@ -900,7 +900,7 @@ int bar(int n){
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[I]], i32 4)
// CHECK3-NEXT: ret void
//
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
Expand Down Expand Up @@ -206,7 +206,7 @@ int main(int argc, char **argv) {
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]])
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -333,7 +333,7 @@ int main(int argc, char **argv) {
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -420,7 +420,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
Expand Down Expand Up @@ -506,7 +506,7 @@ int main(int argc, char **argv) {
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]])
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -627,7 +627,7 @@ int main(int argc, char **argv) {
// CHECK2: omp.dispatch.end:
// CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -714,7 +714,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
Expand Down Expand Up @@ -800,7 +800,7 @@ int main(int argc, char **argv) {
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]])
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
Expand Down Expand Up @@ -921,7 +921,7 @@ int main(int argc, char **argv) {
// CHECK3: omp.dispatch.end:
// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
Expand Down Expand Up @@ -1014,7 +1014,7 @@ int main(int argc, char **argv) {
// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
Expand Down Expand Up @@ -1104,7 +1104,7 @@ int main(int argc, char **argv) {
// CHECK4: omp.loop.exit:
// CHECK4-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4
// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]])
// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]])
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
// CHECK4-NEXT: ret void
Expand Down Expand Up @@ -1231,7 +1231,7 @@ int main(int argc, char **argv) {
// CHECK4: omp.dispatch.end:
// CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
// CHECK4-NEXT: ret void
Expand Down Expand Up @@ -1318,7 +1318,7 @@ int main(int argc, char **argv) {
// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK5-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
Expand Down Expand Up @@ -1404,7 +1404,7 @@ int main(int argc, char **argv) {
// CHECK5: omp.loop.exit:
// CHECK5-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK5-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4
// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]])
// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]])
// CHECK5-NEXT: br label [[OMP_PRECOND_END]]
// CHECK5: omp.precond.end:
// CHECK5-NEXT: ret void
Expand Down Expand Up @@ -1525,7 +1525,7 @@ int main(int argc, char **argv) {
// CHECK5: omp.dispatch.end:
// CHECK5-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK5-NEXT: br label [[OMP_PRECOND_END]]
// CHECK5: omp.precond.end:
// CHECK5-NEXT: ret void
Expand Down Expand Up @@ -1612,7 +1612,7 @@ int main(int argc, char **argv) {
// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK6-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
// CHECK6-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
Expand Down Expand Up @@ -1698,7 +1698,7 @@ int main(int argc, char **argv) {
// CHECK6: omp.loop.exit:
// CHECK6-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK6-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4
// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]])
// CHECK6-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]])
// CHECK6-NEXT: br label [[OMP_PRECOND_END]]
// CHECK6: omp.precond.end:
// CHECK6-NEXT: ret void
Expand Down Expand Up @@ -1819,7 +1819,7 @@ int main(int argc, char **argv) {
// CHECK6: omp.dispatch.end:
// CHECK6-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK6-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]])
// CHECK6-NEXT: br label [[OMP_PRECOND_END]]
// CHECK6: omp.precond.end:
// CHECK6-NEXT: ret void
Expand Down

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,24 +73,24 @@ int bar(int n){
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false)

// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: ret void

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43(
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false)

// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: ret void

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48(
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false)

// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: ret void

// CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32|64}} [[F_IN:%.+]])
Expand All @@ -99,8 +99,8 @@ int bar(int n){
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false)

// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: ret void

#endif
2 changes: 1 addition & 1 deletion clang/tools/scan-build-py/lib/libscanbuild/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

__all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper']

scanbuild_dir = os.path.dirname(__import__('sys').argv[0])
scanbuild_dir = os.path.dirname(os.path.realpath(__import__('sys').argv[0]))

COMPILER_WRAPPER_CC = os.path.join(scanbuild_dir, '..', 'libexec', 'analyze-cc')
COMPILER_WRAPPER_CXX = os.path.join(scanbuild_dir, '..', 'libexec', 'analyze-c++')
Expand Down
19 changes: 19 additions & 0 deletions clang/unittests/AST/ASTImporterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,25 @@ TEST_P(ImportExpr, ImportChooseExpr) {
functionDecl(hasDescendant(chooseExpr())));
}

const internal::VariadicDynCastAllOfMatcher<Stmt, ShuffleVectorExpr>
shuffleVectorExpr;

TEST_P(ImportExpr, ImportShuffleVectorExpr) {
MatchVerifier<Decl> Verifier;
constexpr auto Code = R"code(
typedef double vector4double __attribute__((__vector_size__(32)));
vector4double declToImport(vector4double a, vector4double b) {
return __builtin_shufflevector(a, b, 0, 1, 2, 3);
}
)code";
const auto Pattern = functionDecl(hasDescendant(shuffleVectorExpr(
allOf(has(declRefExpr(to(parmVarDecl(hasName("a"))))),
has(declRefExpr(to(parmVarDecl(hasName("b"))))),
has(integerLiteral(equals(0))), has(integerLiteral(equals(1))),
has(integerLiteral(equals(2))), has(integerLiteral(equals(3)))))));
testImport(Code, Lang_C99, "", Lang_C99, Verifier, Pattern);
}

TEST_P(ImportExpr, ImportGNUNullExpr) {
MatchVerifier<Decl> Verifier;
testImport("void declToImport() { (void)__null; }", Lang_CXX03, "",
Expand Down
21 changes: 11 additions & 10 deletions compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ void FillProfileCallback(uptr p, uptr rss, bool file,
void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
uptr mem[MemCount];
internal_memset(mem, 0, sizeof(mem));
GetMemoryProfile(FillProfileCallback, mem, 7);
GetMemoryProfile(FillProfileCallback, mem, MemCount);
auto meta = ctx->metamap.GetMemoryStats();
StackDepotStats *stacks = StackDepotGetStats();
uptr nthread, nlive;
Expand Down Expand Up @@ -453,18 +453,19 @@ static void InitializeLongjmpXorKey() {
}
#endif

extern "C" void __tsan_tls_initialization() {}

void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
// Check that the thr object is in tls;
const uptr thr_beg = (uptr)thr;
const uptr thr_end = (uptr)thr + sizeof(*thr);
CHECK_GE(thr_beg, tls_addr);
CHECK_LE(thr_beg, tls_addr + tls_size);
CHECK_GE(thr_end, tls_addr);
CHECK_LE(thr_end, tls_addr + tls_size);
// Since the thr object is huge, skip it.
MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr, thr_beg - tls_addr);
MemoryRangeImitateWrite(thr, /*pc=*/2, thr_end,
tls_addr + tls_size - thr_end);
// ThreadState is normally allocated in TLS and is large,
// so we skip it. But unit tests allocate ThreadState outside of TLS.
if (thr_beg < tls_addr || thr_end >= tls_addr + tls_size)
return;
const uptr pc = StackTrace::GetNextInstructionPc(
reinterpret_cast<uptr>(__tsan_tls_initialization));
MemoryRangeImitateWrite(thr, pc, tls_addr, thr_beg - tls_addr);
MemoryRangeImitateWrite(thr, pc, thr_end, tls_addr + tls_size - thr_end);
}

// Note: this function runs with async signals enabled,
Expand Down
11 changes: 11 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,17 @@ ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
last_sleep_clock(tid)
#endif
{
CHECK_EQ(reinterpret_cast<uptr>(this) % SANITIZER_CACHE_LINE_SIZE, 0);
#if !SANITIZER_GO
shadow_stack_pos = shadow_stack;
shadow_stack_end = shadow_stack + kShadowStackSize;
#else
// Setup dynamic shadow stack.
const int kInitStackSize = 8;
shadow_stack = (uptr *)Alloc(kInitStackSize * sizeof(uptr));
shadow_stack_pos = shadow_stack;
shadow_stack_end = shadow_stack + kInitStackSize;
#endif
}

#if !SANITIZER_GO
Expand Down
8 changes: 5 additions & 3 deletions compiler-rt/lib/tsan/rtl/tsan_rtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,12 @@ struct ThreadState {
#if !SANITIZER_GO
IgnoreSet mop_ignore_set;
IgnoreSet sync_ignore_set;
#endif
// C/C++ uses fixed size shadow stack embed into Trace.
// C/C++ uses fixed size shadow stack.
uptr shadow_stack[kShadowStackSize];
#else
// Go uses malloc-allocated shadow stack with dynamic size.
uptr *shadow_stack;
#endif
uptr *shadow_stack_end;
uptr *shadow_stack_pos;
RawShadow *racy_shadow_addr;
Expand Down Expand Up @@ -221,7 +223,7 @@ struct ThreadState {
explicit ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
unsigned reuse_count, uptr stk_addr, uptr stk_size,
uptr tls_addr, uptr tls_size);
};
} ALIGNED(SANITIZER_CACHE_LINE_SIZE);

#if !SANITIZER_GO
#if SANITIZER_MAC || SANITIZER_ANDROID
Expand Down
36 changes: 18 additions & 18 deletions compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ void ThreadContext::OnCreated(void *arg) {
creation_stack_id = CurrentStackId(args->thr, args->pc);
}

extern "C" void __tsan_stack_initialization() {}

struct OnStartedArgs {
ThreadState *thr;
uptr stk_addr;
Expand All @@ -156,13 +158,6 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
if (thread_type != ThreadType::Fiber)
GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_size, &tls_addr,
&tls_size);

if (tid != kMainTid) {
if (stk_addr && stk_size)
MemoryRangeImitateWrite(thr, /*pc=*/ 1, stk_addr, stk_size);

if (tls_addr && tls_size) ImitateTlsWrite(thr, tls_addr, tls_size);
}
#endif

ThreadRegistry *tr = &ctx->thread_registry;
Expand All @@ -178,6 +173,22 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
ThreadIgnoreSyncBegin(thr, 0);
}
#endif

#if !SANITIZER_GO
// Don't imitate stack/TLS writes for the main thread,
// because its initialization is synchronized with all
// subsequent threads anyway.
if (tid != kMainTid) {
if (stk_addr && stk_size) {
const uptr pc = StackTrace::GetNextInstructionPc(
reinterpret_cast<uptr>(__tsan_stack_initialization));
MemoryRangeImitateWrite(thr, pc, stk_addr, stk_size);
}

if (tls_addr && tls_size)
ImitateTlsWrite(thr, tls_addr, tls_size);
}
#endif
}

void ThreadContext::OnStarted(void *arg) {
Expand All @@ -190,17 +201,6 @@ void ThreadContext::OnStarted(void *arg) {
new (thr)
ThreadState(ctx, tid, unique_id, epoch0, reuse_count, args->stk_addr,
args->stk_size, args->tls_addr, args->tls_size);
#if !SANITIZER_GO
thr->shadow_stack = &ThreadTrace(thr->tid)->shadow_stack[0];
thr->shadow_stack_pos = thr->shadow_stack;
thr->shadow_stack_end = thr->shadow_stack + kShadowStackSize;
#else
// Setup dynamic shadow stack.
const int kInitStackSize = 8;
thr->shadow_stack = (uptr *)Alloc(kInitStackSize * sizeof(uptr));
thr->shadow_stack_pos = thr->shadow_stack;
thr->shadow_stack_end = thr->shadow_stack + kInitStackSize;
#endif
if (common_flags()->detect_deadlocks)
thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id);
thr->fast_state.SetHistorySize(flags()->history_size);
Expand Down
11 changes: 1 addition & 10 deletions compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ namespace __tsan {
template <typename StackTraceTy>
static void TestStackTrace(StackTraceTy *trace) {
ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0);
uptr stack[128];
thr.shadow_stack = &stack[0];
thr.shadow_stack_pos = &stack[0];
thr.shadow_stack_end = &stack[128];

ObtainCurrentStack(&thr, 0, trace);
EXPECT_EQ(0U, trace->size);
Expand All @@ -48,13 +44,8 @@ static void TestStackTrace(StackTraceTy *trace) {
template<typename StackTraceTy>
static void TestTrim(StackTraceTy *trace) {
ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0);
const uptr kShadowStackSize = 2 * kStackTraceMax;
uptr stack[kShadowStackSize];
thr.shadow_stack = &stack[0];
thr.shadow_stack_pos = &stack[0];
thr.shadow_stack_end = &stack[kShadowStackSize];

for (uptr i = 0; i < kShadowStackSize; ++i)
for (uptr i = 0; i < 2 * kStackTraceMax; ++i)
*thr.shadow_stack_pos++ = 100 + i;

ObtainCurrentStack(&thr, 0, trace);
Expand Down
332 changes: 161 additions & 171 deletions compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,102 +22,115 @@ using namespace v3;

// We need to run all trace tests in a new thread,
// so that the thread trace is empty initially.
static void run_in_thread(void *(*f)(void *), void *arg = nullptr) {
pthread_t th;
pthread_create(&th, nullptr, f, arg);
pthread_join(th, nullptr);
}

#if SANITIZER_MAC
// These tests are currently failing on Mac.
// See https://reviews.llvm.org/D107911 for more details.
# define MAYBE_RestoreAccess DISABLED_RestoreAccess
# define MAYBE_MemoryAccessSize DISABLED_MemoryAccessSize
# define MAYBE_RestoreMutexLock DISABLED_RestoreMutexLock
# define MAYBE_MultiPart DISABLED_MultiPart
#else
# define MAYBE_RestoreAccess RestoreAccess
# define MAYBE_MemoryAccessSize MemoryAccessSize
# define MAYBE_RestoreMutexLock RestoreMutexLock
# define MAYBE_MultiPart MultiPart
#endif
template <uptr N>
struct ThreadArray {
ThreadArray() {
for (auto *&thr : threads) {
thr = static_cast<ThreadState *>(
MmapOrDie(sizeof(ThreadState), "ThreadState"));
Tid tid = ThreadCreate(cur_thread(), 0, 0, true);
Processor *proc = ProcCreate();
ProcWire(proc, thr);
ThreadStart(thr, tid, 0, ThreadType::Regular);
}
}

TEST(Trace, MAYBE_RestoreAccess) {
struct Thread {
static void *Func(void *arg) {
// A basic test with some function entry/exit events,
// some mutex lock/unlock events and some other distracting
// memory events.
ThreadState *thr = cur_thread();
TraceFunc(thr, 0x1000);
TraceFunc(thr, 0x1001);
TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4001, 0x5001, 0x6001);
TraceMutexUnlock(thr, 0x5000);
TraceFunc(thr);
CHECK(TryTraceMemoryAccess(thr, 0x2001, 0x3001, 8, kAccessRead));
TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5002, 0x6002);
TraceFunc(thr, 0x1002);
CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, 8, kAccessRead));
// This is the access we want to find.
// The previous one is equivalent, but RestoreStack must prefer
// the last of the matchig accesses.
CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res =
RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid,
thr->epoch, 0x3000, 8, kAccessRead, &stk, &mset, &tag);
CHECK(res);
CHECK_EQ(stk.size, 3);
CHECK_EQ(stk.trace[0], 0x1000);
CHECK_EQ(stk.trace[1], 0x1002);
CHECK_EQ(stk.trace[2], 0x2002);
CHECK_EQ(mset.Size(), 2);
CHECK_EQ(mset.Get(0).addr, 0x5001);
CHECK_EQ(mset.Get(0).stack_id, 0x6001);
CHECK_EQ(mset.Get(0).write, true);
CHECK_EQ(mset.Get(1).addr, 0x5002);
CHECK_EQ(mset.Get(1).stack_id, 0x6002);
CHECK_EQ(mset.Get(1).write, false);
CHECK_EQ(tag, kExternalTagNone);
return nullptr;
~ThreadArray() {
for (uptr i = 0; i < N; i++) {
if (threads[i])
Finish(i);
}
};
run_in_thread(Thread::Func);
}

void Finish(uptr i) {
auto *thr = threads[i];
threads[i] = nullptr;
Processor *proc = thr->proc();
ThreadFinish(thr);
ProcUnwire(proc, thr);
ProcDestroy(proc);
UnmapOrDie(thr, sizeof(ThreadState));
}

ThreadState *threads[N];
ThreadState *operator[](uptr i) { return threads[i]; }
ThreadState *operator->() { return threads[0]; }
operator ThreadState *() { return threads[0]; }
};

TEST(Trace, RestoreAccess) {
// A basic test with some function entry/exit events,
// some mutex lock/unlock events and some other distracting
// memory events.
ThreadArray<1> thr;
TraceFunc(thr, 0x1000);
TraceFunc(thr, 0x1001);
TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4001, 0x5001, 0x6001);
TraceMutexUnlock(thr, 0x5000);
TraceFunc(thr);
CHECK(TryTraceMemoryAccess(thr, 0x2001, 0x3001, 8, kAccessRead));
TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5002, 0x6002);
TraceFunc(thr, 0x1002);
CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, 8, kAccessRead));
// This is the access we want to find.
// The previous one is equivalent, but RestoreStack must prefer
// the last of the matchig accesses.
CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res =
RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch,
0x3000, 8, kAccessRead, &stk, &mset, &tag);
CHECK(res);
CHECK_EQ(stk.size, 3);
CHECK_EQ(stk.trace[0], 0x1000);
CHECK_EQ(stk.trace[1], 0x1002);
CHECK_EQ(stk.trace[2], 0x2002);
CHECK_EQ(mset.Size(), 2);
CHECK_EQ(mset.Get(0).addr, 0x5001);
CHECK_EQ(mset.Get(0).stack_id, 0x6001);
CHECK_EQ(mset.Get(0).write, true);
CHECK_EQ(mset.Get(1).addr, 0x5002);
CHECK_EQ(mset.Get(1).stack_id, 0x6002);
CHECK_EQ(mset.Get(1).write, false);
CHECK_EQ(tag, kExternalTagNone);
}

TEST(Trace, MAYBE_MemoryAccessSize) {
struct Thread {
struct Params {
uptr access_size, offset, size;
bool res;
int type;
};
static void *Func(void *arg) {
// Test tracing and matching of accesses of different sizes.
const Params *params = static_cast<Params *>(arg);
TEST(Trace, MemoryAccessSize) {
// Test tracing and matching of accesses of different sizes.
struct Params {
uptr access_size, offset, size;
bool res;
};
Params tests[] = {
{1, 0, 1, true}, {4, 0, 2, true},
{4, 2, 2, true}, {8, 3, 1, true},
{2, 1, 1, true}, {1, 1, 1, false},
{8, 5, 4, false}, {4, static_cast<uptr>(-1l), 4, false},
};
for (auto params : tests) {
for (int type = 0; type < 3; type++) {
ThreadArray<1> thr;
Printf("access_size=%zu, offset=%zu, size=%zu, res=%d, type=%d\n",
params->access_size, params->offset, params->size, params->res,
params->type);
ThreadState *thr = cur_thread();
params.access_size, params.offset, params.size, params.res, type);
TraceFunc(thr, 0x1000);
switch (params->type) {
switch (type) {
case 0:
// This should emit compressed event.
CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, params->access_size,
CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, params.access_size,
kAccessRead));
break;
case 1:
// This should emit full event.
CHECK(TryTraceMemoryAccess(thr, 0x2000000, 0x3000,
params->access_size, kAccessRead));
CHECK(TryTraceMemoryAccess(thr, 0x2000000, 0x3000, params.access_size,
kAccessRead));
break;
case 2:
TraceMemoryAccessRange(thr, 0x2000000, 0x3000, params->access_size,
TraceMemoryAccessRange(thr, 0x2000000, 0x3000, params.access_size,
kAccessRead);
break;
}
Expand All @@ -127,105 +140,82 @@ TEST(Trace, MAYBE_MemoryAccessSize) {
MutexSet mset;
uptr tag = kExternalTagNone;
bool res = RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid,
thr->epoch, 0x3000 + params->offset, params->size,
thr->epoch, 0x3000 + params.offset, params.size,
kAccessRead, &stk, &mset, &tag);
CHECK_EQ(res, params->res);
if (params->res) {
CHECK_EQ(res, params.res);
if (params.res) {
CHECK_EQ(stk.size, 2);
CHECK_EQ(stk.trace[0], 0x1000);
CHECK_EQ(stk.trace[1], params->type ? 0x2000000 : 0x2000);
CHECK_EQ(stk.trace[1], type ? 0x2000000 : 0x2000);
}
return nullptr;
}
};
Thread::Params tests[] = {
{1, 0, 1, true, 0}, {4, 0, 2, true, 0},
{4, 2, 2, true, 0}, {8, 3, 1, true, 0},
{2, 1, 1, true, 0}, {1, 1, 1, false, 0},
{8, 5, 4, false, 0}, {4, static_cast<uptr>(-1l), 4, false, 0},
};
for (auto params : tests) {
for (params.type = 0; params.type < 3; params.type++)
run_in_thread(Thread::Func, &params);
}
}

TEST(Trace, MAYBE_RestoreMutexLock) {
struct Thread {
static void *Func(void *arg) {
// Check of restoration of a mutex lock event.
ThreadState *thr = cur_thread();
TraceFunc(thr, 0x1000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5001, 0x6002);
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res = RestoreStack(thr->tid, v3::EventType::kLock, thr->sid,
thr->epoch, 0x5001, 0, 0, &stk, &mset, &tag);
CHECK(res);
CHECK_EQ(stk.size, 2);
CHECK_EQ(stk.trace[0], 0x1000);
CHECK_EQ(stk.trace[1], 0x4002);
CHECK_EQ(mset.Size(), 2);
CHECK_EQ(mset.Get(0).addr, 0x5000);
CHECK_EQ(mset.Get(0).stack_id, 0x6000);
CHECK_EQ(mset.Get(0).write, true);
CHECK_EQ(mset.Get(1).addr, 0x5001);
CHECK_EQ(mset.Get(1).stack_id, 0x6001);
CHECK_EQ(mset.Get(1).write, false);
return nullptr;
}
};
run_in_thread(Thread::Func);
TEST(Trace, RestoreMutexLock) {
// Check of restoration of a mutex lock event.
ThreadArray<1> thr;
TraceFunc(thr, 0x1000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5001, 0x6002);
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res = RestoreStack(thr->tid, v3::EventType::kLock, thr->sid, thr->epoch,
0x5001, 0, 0, &stk, &mset, &tag);
CHECK(res);
CHECK_EQ(stk.size, 2);
CHECK_EQ(stk.trace[0], 0x1000);
CHECK_EQ(stk.trace[1], 0x4002);
CHECK_EQ(mset.Size(), 2);
CHECK_EQ(mset.Get(0).addr, 0x5000);
CHECK_EQ(mset.Get(0).stack_id, 0x6000);
CHECK_EQ(mset.Get(0).write, true);
CHECK_EQ(mset.Get(1).addr, 0x5001);
CHECK_EQ(mset.Get(1).stack_id, 0x6001);
CHECK_EQ(mset.Get(1).write, false);
}

TEST(Trace, MAYBE_MultiPart) {
struct Thread {
static void *Func(void *arg) {
// Check replay of a trace with multiple parts.
ThreadState *thr = cur_thread();
TraceFunc(thr, 0x1000);
TraceFunc(thr, 0x2000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
const uptr kEvents = 3 * sizeof(TracePart) / sizeof(v3::Event);
for (uptr i = 0; i < kEvents; i++) {
TraceFunc(thr, 0x3000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4002, 0x5002, 0x6002);
TraceMutexUnlock(thr, 0x5002);
TraceFunc(thr);
}
TraceFunc(thr, 0x4000);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res =
RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid,
thr->epoch, 0x3000, 8, kAccessRead, &stk, &mset, &tag);
CHECK(res);
CHECK_EQ(stk.size, 4);
CHECK_EQ(stk.trace[0], 0x1000);
CHECK_EQ(stk.trace[1], 0x2000);
CHECK_EQ(stk.trace[2], 0x4000);
CHECK_EQ(stk.trace[3], 0x2002);
CHECK_EQ(mset.Size(), 2);
CHECK_EQ(mset.Get(0).addr, 0x5000);
CHECK_EQ(mset.Get(0).stack_id, 0x6000);
CHECK_EQ(mset.Get(0).write, true);
CHECK_EQ(mset.Get(1).addr, 0x5001);
CHECK_EQ(mset.Get(1).stack_id, 0x6001);
CHECK_EQ(mset.Get(1).write, false);
return nullptr;
}
};
run_in_thread(Thread::Func);
TEST(Trace, MultiPart) {
// Check replay of a trace with multiple parts.
ThreadArray<1> thr;
TraceFunc(thr, 0x1000);
TraceFunc(thr, 0x2000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
const uptr kEvents = 3 * sizeof(TracePart) / sizeof(v3::Event);
for (uptr i = 0; i < kEvents; i++) {
TraceFunc(thr, 0x3000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4002, 0x5002, 0x6002);
TraceMutexUnlock(thr, 0x5002);
TraceFunc(thr);
}
TraceFunc(thr, 0x4000);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res =
RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch,
0x3000, 8, kAccessRead, &stk, &mset, &tag);
CHECK(res);
CHECK_EQ(stk.size, 4);
CHECK_EQ(stk.trace[0], 0x1000);
CHECK_EQ(stk.trace[1], 0x2000);
CHECK_EQ(stk.trace[2], 0x4000);
CHECK_EQ(stk.trace[3], 0x2002);
CHECK_EQ(mset.Size(), 2);
CHECK_EQ(mset.Get(0).addr, 0x5000);
CHECK_EQ(mset.Get(0).stack_id, 0x6000);
CHECK_EQ(mset.Get(0).write, true);
CHECK_EQ(mset.Get(1).addr, 0x5001);
CHECK_EQ(mset.Get(1).stack_id, 0x6001);
CHECK_EQ(mset.Get(1).write, false);
}

} // namespace __tsan
34 changes: 34 additions & 0 deletions compiler-rt/test/tsan/stack_race3.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s

// Race with initial stack initialization:
// there is no explicit second write,
// but the stack variable is published unsafely.
#include "test.h"

long *P;

void *Thread(void *a) {
long X;
__atomic_store_n(&P, &X, __ATOMIC_RELAXED);
barrier_wait(&barrier);
barrier_wait(&barrier);
return 0;
}

int main() {
barrier_init(&barrier, 2);
pthread_t t;
pthread_create(&t, NULL, Thread, NULL);
barrier_wait(&barrier);
long *p = __atomic_load_n(&P, __ATOMIC_RELAXED);
*p = 42;
barrier_wait(&barrier);
pthread_join(t, 0);
}

// CHECK: WARNING: ThreadSanitizer: data race
// CHECK: Write of size 8 at {{.*}} by main thread:
// CHECK: #0 main
// CHECK: Previous write of size 8 at {{.*}} by thread T1:
// CHECK: #0 __tsan_stack_initialization
// CHECK: Location is stack of thread T1
35 changes: 35 additions & 0 deletions compiler-rt/test/tsan/tls_race3.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// RUN: %clangxx_tsan %darwin_min_target_with_tls_support -O1 %s -o %t && \
// RUN: %deflake %run %t | FileCheck %s

// Race with initial TLS initialization:
// there is no explicit second write,
// but the TLS variable is published unsafely.
#include "test.h"

__thread long X;
long *P;

void *Thread(void *a) {
__atomic_store_n(&P, &X, __ATOMIC_RELAXED);
barrier_wait(&barrier);
barrier_wait(&barrier);
return 0;
}

int main() {
barrier_init(&barrier, 2);
pthread_t t;
pthread_create(&t, NULL, Thread, NULL);
barrier_wait(&barrier);
long *p = __atomic_load_n(&P, __ATOMIC_RELAXED);
*p = 42;
barrier_wait(&barrier);
pthread_join(t, 0);
}

// CHECK: WARNING: ThreadSanitizer: data race
// CHECK: Write of size 8 at {{.*}} by main thread:
// CHECK: #0 main
// CHECK: Previous write of size 8 at {{.*}} by thread T1:
// CHECK: #0 __tsan_tls_initialization
// CHECK: Location is TLS of thread T1.
40 changes: 30 additions & 10 deletions flang/include/flang/Semantics/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,11 @@ class ExpressionAnalyzer {
explicit ExpressionAnalyzer(semantics::SemanticsContext &sc) : context_{sc} {}
ExpressionAnalyzer(semantics::SemanticsContext &sc, FoldingContext &fc)
: context_{sc}, foldingContext_{fc} {}
ExpressionAnalyzer(ExpressionAnalyzer &) = default;
ExpressionAnalyzer(const ExpressionAnalyzer &) = default;

semantics::SemanticsContext &context() const { return context_; }
bool inWhereBody() const { return inWhereBody_; }
void set_inWhereBody(bool yes = true) { inWhereBody_ = yes; }

FoldingContext &GetFoldingContext() const { return foldingContext_; }

Expand Down Expand Up @@ -366,6 +368,7 @@ class ExpressionAnalyzer {
std::map<parser::CharBlock, int> impliedDos_; // values are INTEGER kinds
bool isWholeAssumedSizeArrayOk_{false};
bool useSavedTypedExprs_{true};
bool inWhereBody_{false};
friend class ArgumentAnalyzer;
};

Expand Down Expand Up @@ -402,12 +405,6 @@ evaluate::Expr<evaluate::SubscriptInteger> AnalyzeKindSelector(
SemanticsContext &, common::TypeCategory,
const std::optional<parser::KindSelector> &);

void AnalyzeCallStmt(SemanticsContext &, const parser::CallStmt &);
const evaluate::Assignment *AnalyzeAssignmentStmt(
SemanticsContext &, const parser::AssignmentStmt &);
const evaluate::Assignment *AnalyzePointerAssignmentStmt(
SemanticsContext &, const parser::PointerAssignmentStmt &);

// Semantic analysis of all expressions in a parse tree, which becomes
// decorated with typed representations for top-level expressions.
class ExprChecker {
Expand Down Expand Up @@ -445,18 +442,38 @@ class ExprChecker {
bool Pre(const parser::DataImpliedDo &);

bool Pre(const parser::CallStmt &x) {
AnalyzeCallStmt(context_, x);
exprAnalyzer_.Analyze(x);
return false;
}
bool Pre(const parser::AssignmentStmt &x) {
AnalyzeAssignmentStmt(context_, x);
exprAnalyzer_.Analyze(x);
return false;
}
bool Pre(const parser::PointerAssignmentStmt &x) {
AnalyzePointerAssignmentStmt(context_, x);
exprAnalyzer_.Analyze(x);
return false;
}

// Track whether we're in a WHERE statement or construct body
bool Pre(const parser::WhereStmt &) {
++whereDepth_;
exprAnalyzer_.set_inWhereBody(InWhereBody());
return true;
}
void Post(const parser::WhereStmt &) {
--whereDepth_;
exprAnalyzer_.set_inWhereBody(InWhereBody());
}
bool Pre(const parser::WhereBodyConstruct &) {
++whereDepth_;
exprAnalyzer_.set_inWhereBody(InWhereBody());
return true;
}
void Post(const parser::WhereBodyConstruct &) {
--whereDepth_;
exprAnalyzer_.set_inWhereBody(InWhereBody());
}

template <typename A> bool Pre(const parser::Scalar<A> &x) {
exprAnalyzer_.Analyze(x);
return false;
Expand All @@ -479,8 +496,11 @@ class ExprChecker {
}

private:
bool InWhereBody() const { return whereDepth_ > 0; }

SemanticsContext &context_;
evaluate::ExpressionAnalyzer exprAnalyzer_{context_};
int whereDepth_{0}; // nesting of WHERE statements & constructs
};
} // namespace Fortran::semantics
#endif // FORTRAN_SEMANTICS_EXPRESSION_H_
Loading